1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "disassembler.h"
17 #include "class_data_accessor.h"
18 #include "field_data_accessor.h"
19 #include "libpandafile/type_helper.h"
20 #include "mangling.h"
21 #include "utils/logger.h"
22
23 #include <iomanip>
24
25 #include "get_language_specific_metadata.inc"
26
27 namespace ark::disasm {
28
Disassemble(std::string_view filenameIn,const bool quiet,const bool skipStrings)29 void Disassembler::Disassemble(std::string_view filenameIn, const bool quiet, const bool skipStrings)
30 {
31 auto file = panda_file::File::Open(filenameIn);
32 if (file == nullptr) {
33 LOG(FATAL, DISASSEMBLER) << "> unable to open specified pandafile: <" << filenameIn << ">";
34 }
35
36 Disassemble(file, quiet, skipStrings);
37 }
38
Disassemble(const panda_file::File & file,const bool quiet,const bool skipStrings)39 void Disassembler::Disassemble(const panda_file::File &file, const bool quiet, const bool skipStrings)
40 {
41 SetFile(file);
42 DisassembleImpl(quiet, skipStrings);
43 }
44
Disassemble(std::unique_ptr<const panda_file::File> & file,const bool quiet,const bool skipStrings)45 void Disassembler::Disassemble(std::unique_ptr<const panda_file::File> &file, const bool quiet, const bool skipStrings)
46 {
47 SetFile(file);
48 DisassembleImpl(quiet, skipStrings);
49 }
50
DisassembleImpl(const bool quiet,const bool skipStrings)51 void Disassembler::DisassembleImpl(const bool quiet, const bool skipStrings)
52 {
53 prog_ = pandasm::Program {};
54
55 recordNameToId_.clear();
56 methodNameToId_.clear();
57
58 skipStrings_ = skipStrings;
59 quiet_ = quiet;
60
61 progInfo_ = ProgInfo {};
62
63 progAnn_ = ProgAnnotations {};
64
65 GetLiteralArrays();
66 GetRecords();
67
68 AddExternalFieldsToRecords();
69 GetLanguageSpecificMetadata();
70 }
71
SetFile(std::unique_ptr<const panda_file::File> & file)72 void Disassembler::SetFile(std::unique_ptr<const panda_file::File> &file)
73 {
74 fileHolder_.swap(file);
75 file_ = fileHolder_.get();
76 }
77
SetFile(const panda_file::File & file)78 void Disassembler::SetFile(const panda_file::File &file)
79 {
80 fileHolder_.reset();
81 file_ = &file;
82 }
83
SetProfile(std::string_view fname)84 void Disassembler::SetProfile(std::string_view fname)
85 {
86 std::ifstream stm(fname.data(), std::ios::binary);
87 if (!stm.is_open()) {
88 LOG(FATAL, DISASSEMBLER) << "Cannot open profile file";
89 }
90
91 auto res = profiling::ReadProfile(stm, fileLanguage_);
92 if (!res) {
93 LOG(FATAL, DISASSEMBLER) << "Failed to deserialize: " << res.Error();
94 }
95 profile_ = res.Value();
96 }
97
GetInsInfo(panda_file::MethodDataAccessor & mda,const panda_file::File::EntityId & codeId,MethodInfo * methodInfo) const98 void Disassembler::GetInsInfo(panda_file::MethodDataAccessor &mda, const panda_file::File::EntityId &codeId,
99 MethodInfo *methodInfo /* out */) const
100 {
101 const static size_t FORMAT_WIDTH = 20;
102 const static size_t INSTRUCTION_WIDTH = 2;
103
104 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
105
106 std::string methodName = mda.GetFullName();
107 auto prof = profiling::INVALID_PROFILE;
108 if (profile_ != profiling::INVALID_PROFILE) {
109 prof = profiling::FindMethodInProfile(profile_, fileLanguage_, methodName);
110 }
111
112 auto insSz = codeAccessor.GetCodeSize();
113 auto insArr = codeAccessor.GetInstructions();
114
115 auto bcIns = BytecodeInstruction(insArr);
116 auto bcInsLast = bcIns.JumpTo(insSz);
117
118 while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
119 std::stringstream ss;
120
121 uintptr_t bc = bcIns.GetAddress() - BytecodeInstruction(insArr).GetAddress();
122 ss << "offset: 0x" << std::setfill('0') << std::setw(4U) << std::hex << bc;
123 ss << ", " << std::setfill('.');
124
125 BytecodeInstruction::Format format = bcIns.GetFormat();
126
127 auto formatStr = std::string("[") + BytecodeInstruction::GetFormatString(format) + ']';
128 ss << std::setw(FORMAT_WIDTH) << std::left << formatStr;
129
130 ss << "[";
131
132 const uint8_t *pc = bcIns.GetAddress();
133 const size_t sz = bcIns.GetSize();
134
135 for (size_t i = 0; i < sz; i++) {
136 ss << "0x" << std::setw(INSTRUCTION_WIDTH) << std::setfill('0') << std::right << std::hex
137 << static_cast<int>(pc[i]); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
138
139 if (i != sz - 1) {
140 ss << " ";
141 }
142 }
143
144 ss << "]";
145
146 if (profile_ != profiling::INVALID_PROFILE && prof != profiling::INVALID_PROFILE) {
147 auto profId = bcIns.GetProfileId();
148 if (profId != -1) {
149 ss << ", Profile: ";
150 profiling::DumpProfile(prof, fileLanguage_, &bcIns, ss);
151 }
152 }
153
154 methodInfo->instructionsInfo.push_back(ss.str());
155
156 bcIns = bcIns.GetNext();
157 }
158 }
159
CollectInfo()160 void Disassembler::CollectInfo()
161 {
162 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
163
164 debugInfoExtractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_);
165
166 for (const auto &pair : recordNameToId_) {
167 GetRecordInfo(pair.second, &progInfo_.recordsInfo[pair.first]);
168 }
169
170 for (const auto &pair : methodNameToId_) {
171 GetMethodInfo(pair.second, &progInfo_.methodsInfo[pair.first]);
172 }
173
174 AddExternalFieldsInfoToRecords();
175 }
176
Serialize(std::ostream & os,bool addSeparators,bool printInformation) const177 void Disassembler::Serialize(std::ostream &os, bool addSeparators, bool printInformation) const
178 {
179 if (os.bad()) {
180 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
181
182 return;
183 }
184
185 SerializeFilename(os);
186 SerializeLanguage(os);
187 SerializeLitArrays(os, addSeparators);
188 SerializeRecords(os, addSeparators, printInformation);
189 SerializeMethods(os, addSeparators, printInformation);
190 }
191
SerializePrintStartInfo(const pandasm::Function & method,std::ostringstream & headerSs) const192 void Disassembler::SerializePrintStartInfo(const pandasm::Function &method, std::ostringstream &headerSs) const
193 {
194 headerSs << ".function " << method.returnType.GetPandasmName() << " " << method.name << "(";
195
196 if (!method.params.empty()) {
197 headerSs << method.params[0].type.GetPandasmName() << " a0";
198
199 for (size_t i = 1; i < method.params.size(); i++) {
200 headerSs << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
201 }
202 }
203 headerSs << ")";
204 }
205
SerializeCheckEnd(const pandasm::Function & method,std::ostream & os,bool printMethodInfo,const MethodInfo * & methodInfo) const206 void Disassembler::SerializeCheckEnd(const pandasm::Function &method, std::ostream &os, bool printMethodInfo,
207 const MethodInfo *&methodInfo) const
208 {
209 if (!method.catchBlocks.empty()) {
210 os << "\n";
211
212 for (const auto &catchBlock : method.catchBlocks) {
213 Serialize(catchBlock, os);
214 os << "\n";
215 }
216 }
217
218 if (printMethodInfo) {
219 ASSERT(methodInfo != nullptr);
220 SerializeLineNumberTable(methodInfo->lineNumberTable, os);
221 SerializeLocalVariableTable(methodInfo->localVariableTable, method, os);
222 }
223
224 os << "}\n\n";
225 }
226
SerializeIfPrintMethodInfo(const pandasm::Function & method,bool printMethodInfo,std::ostringstream & headerSs,const MethodInfo * & methodInfo,std::map<std::string,ark::disasm::MethodInfo>::const_iterator & methodInfoIt) const227 size_t Disassembler::SerializeIfPrintMethodInfo(
228 const pandasm::Function &method, bool printMethodInfo, std::ostringstream &headerSs, const MethodInfo *&methodInfo,
229 std::map<std::string, ark::disasm::MethodInfo>::const_iterator &methodInfoIt) const
230 {
231 size_t width = 0;
232 if (printMethodInfo) {
233 methodInfo = &methodInfoIt->second;
234
235 for (const auto &i : method.ins) {
236 if (i.ToString().size() > width) {
237 width = i.ToString().size();
238 }
239 }
240
241 headerSs << " # " << methodInfo->methodInfo << "\n# CODE:";
242 }
243
244 headerSs << "\n";
245 return width;
246 }
247
Serialize(const pandasm::Function & method,std::ostream & os,bool printInformation,panda_file::LineNumberTable * lineTable) const248 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool printInformation,
249 panda_file::LineNumberTable *lineTable) const
250 {
251 std::ostringstream headerSs;
252 SerializePrintStartInfo(method, headerSs);
253 const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
254 const auto methodIter = progAnn_.methodAnnotations.find(signature);
255 if (methodIter != progAnn_.methodAnnotations.end()) {
256 Serialize(*method.metadata, methodIter->second, headerSs);
257 } else {
258 Serialize(*method.metadata, {}, headerSs);
259 }
260
261 if (!method.HasImplementation()) {
262 headerSs << "\n\n";
263 os << headerSs.str();
264 return;
265 }
266
267 headerSs << " {";
268
269 const MethodInfo *methodInfo = nullptr;
270 auto methodInfoIt = progInfo_.methodsInfo.find(signature);
271 bool printMethodInfo = printInformation && methodInfoIt != progInfo_.methodsInfo.end();
272 size_t width = SerializeIfPrintMethodInfo(method, printMethodInfo, headerSs, methodInfo, methodInfoIt);
273
274 auto headerSsStr = headerSs.str();
275 size_t lineNumber = std::count(headerSsStr.begin(), headerSsStr.end(), '\n') + 1;
276
277 os << headerSsStr;
278
279 for (size_t i = 0; i < method.ins.size(); i++) {
280 std::ostringstream insSs;
281
282 std::string ins = method.ins[i].ToString("", method.GetParamsNum() != 0, method.regsNum);
283 if (method.ins[i].setLabel) {
284 insSs << ins.substr(0, ins.find(": ")) << ":\n";
285 ins.erase(0, ins.find(": ") + std::string(": ").length());
286 }
287
288 insSs << "\t";
289 if (printMethodInfo) {
290 insSs << std::setw(width) << std::left;
291 }
292 insSs << ins;
293 if (printMethodInfo) {
294 ASSERT(methodInfo != nullptr);
295 insSs << " # " << methodInfo->instructionsInfo[i];
296 }
297 insSs << "\n";
298
299 auto insSsStr = insSs.str();
300 lineNumber += std::count(insSsStr.begin(), insSsStr.end(), '\n');
301
302 if (lineTable != nullptr) {
303 lineTable->emplace_back(
304 panda_file::LineTableEntry {static_cast<uint32_t>(method.ins[i].insDebug.boundLeft), lineNumber - 1});
305 }
306
307 os << insSsStr;
308 }
309
310 SerializeCheckEnd(method, os, printMethodInfo, methodInfo);
311 }
312
IsSystemType(const std::string & typeName)313 inline bool Disassembler::IsSystemType(const std::string &typeName)
314 {
315 bool isArrayType = typeName.back() == ']';
316 bool isGlobal = typeName == "_GLOBAL";
317
318 return isArrayType || isGlobal;
319 }
320
GetRecord(pandasm::Record & record,const panda_file::File::EntityId & recordId)321 void Disassembler::GetRecord(pandasm::Record &record, const panda_file::File::EntityId &recordId)
322 {
323 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << recordId << " (0x" << std::hex << recordId << ")";
324
325 record.name = GetFullRecordName(recordId);
326
327 LOG(DEBUG, DISASSEMBLER) << "name: " << record.name;
328
329 GetMetaData(&record, recordId);
330
331 if (!file_->IsExternal(recordId)) {
332 GetMethods(recordId);
333 GetFields(record, recordId);
334 }
335 }
336
AddMethodToTables(const panda_file::File::EntityId & methodId)337 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &methodId)
338 {
339 pandasm::Function newMethod("", fileLanguage_);
340 GetMethod(&newMethod, methodId);
341
342 const auto signature = pandasm::GetFunctionSignatureFromName(newMethod.name, newMethod.params);
343 if (prog_.functionTable.find(signature) != prog_.functionTable.end()) {
344 return;
345 }
346
347 methodNameToId_.emplace(signature, methodId);
348 prog_.functionSynonyms[newMethod.name].push_back(signature);
349 prog_.functionTable.emplace(signature, std::move(newMethod));
350 }
351
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & methodId)352 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &methodId)
353 {
354 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << methodId << " (0x" << std::hex << methodId << ")";
355
356 if (method == nullptr) {
357 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
358
359 return;
360 }
361
362 panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
363
364 method->name = GetFullMethodName(methodId);
365
366 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
367
368 GetParams(method, methodAccessor.GetProtoId());
369 GetMetaData(method, methodId);
370
371 if (!method->HasImplementation()) {
372 return;
373 }
374
375 if (methodAccessor.GetCodeId().has_value()) {
376 const IdList idList = GetInstructions(method, methodId, methodAccessor.GetCodeId().value());
377
378 for (const auto &id : idList) {
379 AddMethodToTables(id);
380 }
381 } else {
382 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << methodId << " (0x" << std::hex << methodId
383 << "). implementation of method expected, but no \'CODE\' tag was found!";
384
385 return;
386 }
387 }
388
389 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * litArray,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const390 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *litArray, const panda_file::LiteralTag &tag,
391 const panda_file::LiteralDataAccessor::LiteralValue &value) const
392 {
393 panda_file::File::EntityId id(std::get<uint32_t>(value));
394 auto sp = file_->GetSpanFromId(id);
395 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
396 if (tag != panda_file::LiteralTag::ARRAY_STRING) {
397 for (size_t i = 0; i < len; i++) {
398 pandasm::LiteralArray::Literal lit;
399 lit.tag = tag;
400 lit.value = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
401 litArray->literals.push_back(lit);
402 }
403 } else {
404 for (size_t i = 0; i < len; i++) {
405 auto strId = panda_file::helpers::Read<sizeof(T)>(&sp);
406 pandasm::LiteralArray::Literal lit;
407 lit.tag = tag;
408 lit.value = StringDataToString(file_->GetStringData(panda_file::File::EntityId(strId)));
409 litArray->literals.push_back(lit);
410 }
411 }
412 }
413
FillLiteralData(pandasm::LiteralArray * litArray,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const414 void Disassembler::FillLiteralData(pandasm::LiteralArray *litArray,
415 const panda_file::LiteralDataAccessor::LiteralValue &value,
416 const panda_file::LiteralTag &tag) const
417 {
418 pandasm::LiteralArray::Literal lit;
419 lit.tag = tag;
420 switch (tag) {
421 case panda_file::LiteralTag::BOOL: {
422 lit.value = std::get<bool>(value);
423 break;
424 }
425 case panda_file::LiteralTag::ACCESSOR:
426 case panda_file::LiteralTag::NULLVALUE: {
427 lit.value = std::get<uint8_t>(value);
428 break;
429 }
430 case panda_file::LiteralTag::METHODAFFILIATE: {
431 lit.value = std::get<uint16_t>(value);
432 break;
433 }
434 case panda_file::LiteralTag::INTEGER: {
435 lit.value = std::get<uint32_t>(value);
436 break;
437 }
438 case panda_file::LiteralTag::BIGINT: {
439 lit.value = std::get<uint64_t>(value);
440 break;
441 }
442 case panda_file::LiteralTag::FLOAT: {
443 lit.value = std::get<float>(value);
444 break;
445 }
446 case panda_file::LiteralTag::DOUBLE: {
447 lit.value = std::get<double>(value);
448 break;
449 }
450 case panda_file::LiteralTag::STRING:
451 case panda_file::LiteralTag::METHOD:
452 case panda_file::LiteralTag::GENERATORMETHOD: {
453 auto strData = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
454 lit.value = StringDataToString(strData);
455 break;
456 }
457 case panda_file::LiteralTag::TAGVALUE: {
458 return;
459 }
460 default: {
461 LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
462 UNREACHABLE();
463 }
464 }
465 litArray->literals.push_back(lit);
466 }
467
GetLiteralArray(pandasm::LiteralArray * litArray,const size_t index)468 void Disassembler::GetLiteralArray(pandasm::LiteralArray *litArray, const size_t index)
469 {
470 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal array]\nindex: " << index;
471
472 panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
473
474 // clang-format off
475 litArrayAccessor.EnumerateLiteralVals(index,
476 [this, litArray](const panda_file::LiteralDataAccessor::LiteralValue &value,
477 const panda_file::LiteralTag &tag) {
478 switch (tag) {
479 case panda_file::LiteralTag::ARRAY_U1: {
480 FillLiteralArrayData<bool>(litArray, tag, value);
481 break;
482 }
483 case panda_file::LiteralTag::ARRAY_I8:
484 case panda_file::LiteralTag::ARRAY_U8: {
485 FillLiteralArrayData<uint8_t>(litArray, tag, value);
486 break;
487 }
488 case panda_file::LiteralTag::ARRAY_I16:
489 case panda_file::LiteralTag::ARRAY_U16: {
490 FillLiteralArrayData<uint16_t>(litArray, tag, value);
491 break;
492 }
493 case panda_file::LiteralTag::ARRAY_I32:
494 case panda_file::LiteralTag::ARRAY_U32: {
495 FillLiteralArrayData<uint32_t>(litArray, tag, value);
496 break;
497 }
498 case panda_file::LiteralTag::ARRAY_I64:
499 case panda_file::LiteralTag::ARRAY_U64: {
500 FillLiteralArrayData<uint64_t>(litArray, tag, value);
501 break;
502 }
503 case panda_file::LiteralTag::ARRAY_F32: {
504 FillLiteralArrayData<float>(litArray, tag, value);
505 break;
506 }
507 case panda_file::LiteralTag::ARRAY_F64: {
508 FillLiteralArrayData<double>(litArray, tag, value);
509 break;
510 }
511 case panda_file::LiteralTag::ARRAY_STRING: {
512 FillLiteralArrayData<uint32_t>(litArray, tag, value);
513 break;
514 }
515 default: {
516 FillLiteralData(litArray, value, tag);
517 }
518 }
519 });
520 // clang-format on
521 }
522
GetLiteralArrays()523 void Disassembler::GetLiteralArrays()
524 {
525 const auto litArraysId = file_->GetLiteralArraysId();
526
527 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << litArraysId << " (0x" << std::hex << litArraysId
528 << ")";
529
530 panda_file::LiteralDataAccessor litArrayAccessor(*file_, litArraysId);
531 size_t numLitarrays = litArrayAccessor.GetLiteralNum();
532 for (size_t index = 0; index < numLitarrays; index++) {
533 ark::pandasm::LiteralArray litAr;
534 GetLiteralArray(&litAr, index);
535 prog_.literalarrayTable.emplace(std::to_string(index), litAr);
536 }
537 }
538
GetRecords()539 void Disassembler::GetRecords()
540 {
541 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
542
543 const auto classIdx = file_->GetClasses();
544
545 for (size_t i = 0; i < classIdx.size(); i++) {
546 uint32_t classId = classIdx[i];
547 auto classOff = file_->GetHeader()->classIdxOff + sizeof(uint32_t) * i;
548
549 if (classId > file_->GetHeader()->fileSize) {
550 LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << classOff << " (0x" << std::hex
551 << classOff << "). binary file corrupted. record offset (0x" << classId
552 << ") out of bounds (0x" << file_->GetHeader()->fileSize << ")!";
553 break;
554 }
555
556 const panda_file::File::EntityId recordId {classId};
557 auto language = GetRecordLanguage(recordId);
558 if (language != fileLanguage_) {
559 if (fileLanguage_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
560 fileLanguage_ = language;
561 } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
562 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << classOff << " (0x"
563 << std::hex << classOff << "). record's language ("
564 << panda_file::LanguageToString(language)
565 << ") differs from file's language ("
566 << panda_file::LanguageToString(fileLanguage_) << ")!";
567 }
568 }
569
570 pandasm::Record record("", fileLanguage_);
571 GetRecord(record, recordId);
572
573 if (prog_.recordTable.find(record.name) == prog_.recordTable.end()) {
574 recordNameToId_.emplace(record.name, recordId);
575 prog_.recordTable.emplace(record.name, std::move(record));
576 }
577 }
578 }
579
GetField(pandasm::Field & field,const panda_file::FieldDataAccessor & fieldAccessor)580 void Disassembler::GetField(pandasm::Field &field, const panda_file::FieldDataAccessor &fieldAccessor)
581 {
582 panda_file::File::EntityId fieldNameId = fieldAccessor.GetNameId();
583 field.name = StringDataToString(file_->GetStringData(fieldNameId));
584
585 uint32_t fieldType = fieldAccessor.GetType();
586 field.type = FieldTypeToPandasmType(fieldType);
587
588 GetMetaData(&field, fieldAccessor.GetFieldId());
589 }
590
GetFields(pandasm::Record & record,const panda_file::File::EntityId & recordId)591 void Disassembler::GetFields(pandasm::Record &record, const panda_file::File::EntityId &recordId)
592 {
593 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
594
595 classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
596 pandasm::Field field(fileLanguage_);
597
598 GetField(field, fieldAccessor);
599
600 record.fieldList.push_back(std::move(field));
601 });
602 }
603
AddExternalFieldsToRecords()604 void Disassembler::AddExternalFieldsToRecords()
605 {
606 for (auto &[recordName, record] : prog_.recordTable) {
607 auto iter = externalFieldTable_.find(recordName);
608 if (iter == externalFieldTable_.end() || iter->second.empty()) {
609 continue;
610 }
611 for (auto &fieldIter : iter->second) {
612 record.fieldList.push_back(std::move(fieldIter));
613 }
614 externalFieldTable_.erase(recordName);
615 }
616 }
617
AddExternalFieldsInfoToRecords()618 void Disassembler::AddExternalFieldsInfoToRecords()
619 {
620 for (auto &[recordName, recordInfo] : progInfo_.recordsInfo) {
621 auto iter = externalFieldsInfoTable_.find(recordName);
622 if (iter == externalFieldsInfoTable_.end() || iter->second.empty()) {
623 continue;
624 }
625 for (auto &info : iter->second) {
626 recordInfo.fieldsInfo.push_back(std::move(info));
627 }
628 externalFieldsInfoTable_.erase(recordName);
629 }
630 }
631
GetMethods(const panda_file::File::EntityId & recordId)632 void Disassembler::GetMethods(const panda_file::File::EntityId &recordId)
633 {
634 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
635
636 classAccessor.EnumerateMethods([&](panda_file::MethodDataAccessor &methodAccessor) -> void {
637 AddMethodToTables(methodAccessor.GetMethodId());
638 });
639 }
640
GetParams(pandasm::Function * method,const panda_file::File::EntityId & protoId) const641 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &protoId) const
642 {
643 /// frame size - 2^16 - 1
644 static const uint32_t MAX_ARG_NUM = 0xFFFF;
645
646 LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << protoId << " (0x" << std::hex << protoId << ")";
647
648 if (method == nullptr) {
649 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
650
651 return;
652 }
653
654 panda_file::ProtoDataAccessor protoAccessor(*file_, protoId);
655
656 if (protoAccessor.GetNumArgs() > MAX_ARG_NUM) {
657 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << protoId << " (0x" << std::hex << protoId
658 << "). number of function's arguments (" << std::dec << protoAccessor.GetNumArgs()
659 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
660
661 return;
662 }
663
664 size_t refIdx = 0;
665 method->returnType = PFTypeToPandasmType(protoAccessor.GetReturnType(), protoAccessor, refIdx);
666
667 for (size_t i = 0; i < protoAccessor.GetNumArgs(); i++) {
668 auto argType = PFTypeToPandasmType(protoAccessor.GetArgType(i), protoAccessor, refIdx);
669 method->params.emplace_back(argType, fileLanguage_);
670 }
671 }
672
GetExceptions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId) const673 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId methodId,
674 panda_file::File::EntityId codeId) const
675 {
676 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << codeId << " (0x" << std::hex << codeId << ")";
677
678 if (method == nullptr) {
679 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
680 return LabelTable {};
681 }
682
683 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
684
685 const auto bcIns = BytecodeInstruction(codeAccessor.GetInstructions());
686 const auto bcInsLast = bcIns.JumpTo(codeAccessor.GetCodeSize());
687
688 size_t tryIdx = 0;
689 LabelTable labelTable {};
690 codeAccessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &tryBlock) {
691 pandasm::Function::CatchBlock catchBlockPa {};
692 if (!LocateTryBlock(bcIns, bcInsLast, tryBlock, &catchBlockPa, &labelTable, tryIdx)) {
693 return false;
694 }
695 size_t catchIdx = 0;
696 tryBlock.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catchBlock) {
697 auto classIdx = catchBlock.GetTypeIdx();
698 if (classIdx == panda_file::INVALID_INDEX) {
699 catchBlockPa.exceptionRecord = "";
700 } else {
701 const auto classId = file_->ResolveClassIndex(methodId, classIdx);
702 catchBlockPa.exceptionRecord = GetFullRecordName(classId);
703 }
704 if (!LocateCatchBlock(bcIns, bcInsLast, catchBlock, &catchBlockPa, &labelTable, tryIdx, catchIdx)) {
705 return false;
706 }
707
708 method->catchBlocks.push_back(catchBlockPa);
709 catchBlockPa.catchBeginLabel = "";
710 catchBlockPa.catchEndLabel = "";
711 catchIdx++;
712
713 return true;
714 });
715 tryIdx++;
716
717 return true;
718 });
719
720 return labelTable;
721 }
722
GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst,BytecodeInstruction bcInsCur)723 static size_t GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst, BytecodeInstruction bcInsCur)
724 {
725 size_t count = 0;
726
727 while (bcInsFirst.GetAddress() != bcInsCur.GetAddress()) {
728 count++;
729 bcInsFirst = bcInsFirst.GetNext();
730 if (bcInsFirst.GetAddress() > bcInsCur.GetAddress()) {
731 return std::numeric_limits<size_t>::max();
732 }
733 }
734
735 return count;
736 }
737
LocateTryBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::TryBlock & tryBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx) const738 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
739 const panda_file::CodeDataAccessor::TryBlock &tryBlock,
740 pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable,
741 size_t tryIdx) const
742 {
743 const auto tryBeginBcIns = bcIns.JumpTo(tryBlock.GetStartPc());
744 const auto tryEndBcIns = bcIns.JumpTo(tryBlock.GetStartPc() + tryBlock.GetLength());
745
746 const size_t tryBeginIdx = GetBytecodeInstructionNumber(bcIns, tryBeginBcIns);
747 const size_t tryEndIdx = GetBytecodeInstructionNumber(bcIns, tryEndBcIns);
748
749 const bool tryBeginOffsetInRange = bcInsLast.GetAddress() > tryBeginBcIns.GetAddress();
750 const bool tryEndOffsetInRange = bcInsLast.GetAddress() >= tryEndBcIns.GetAddress();
751 const bool tryBeginOffsetValid = tryBeginIdx != std::numeric_limits<size_t>::max();
752 const bool tryEndOffsetValid = tryEndIdx != std::numeric_limits<size_t>::max();
753
754 if (!tryBeginOffsetInRange || !tryBeginOffsetValid) {
755 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
756 << tryBeginBcIns.GetAddress();
757 return false;
758 }
759
760 auto itBegin = labelTable->find(tryBeginIdx);
761 if (itBegin == labelTable->end()) {
762 std::stringstream ss {};
763 ss << "try_begin_label_" << tryIdx;
764 catchBlockPa->tryBeginLabel = ss.str();
765 labelTable->insert(std::pair<size_t, std::string>(tryBeginIdx, ss.str()));
766 } else {
767 catchBlockPa->tryBeginLabel = itBegin->second;
768 }
769
770 if (!tryEndOffsetInRange || !tryEndOffsetValid) {
771 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
772 << tryEndBcIns.GetAddress();
773 return false;
774 }
775
776 auto itEnd = labelTable->find(tryEndIdx);
777 if (itEnd == labelTable->end()) {
778 std::stringstream ss {};
779 ss << "try_end_label_" << tryIdx;
780 catchBlockPa->tryEndLabel = ss.str();
781 labelTable->insert(std::pair<size_t, std::string>(tryEndIdx, ss.str()));
782 } else {
783 catchBlockPa->tryEndLabel = itEnd->second;
784 }
785
786 return true;
787 }
788
LocateCatchBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::CatchBlock & catchBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx,size_t catchIdx) const789 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
790 const panda_file::CodeDataAccessor::CatchBlock &catchBlock,
791 pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable, size_t tryIdx,
792 size_t catchIdx) const
793 {
794 const auto handlerBeginOffset = catchBlock.GetHandlerPc();
795 const auto handlerEndOffset = handlerBeginOffset + catchBlock.GetCodeSize();
796
797 const auto handlerBeginBcIns = bcIns.JumpTo(handlerBeginOffset);
798 const auto handlerEndBcIns = bcIns.JumpTo(handlerEndOffset);
799
800 const size_t handlerBeginIdx = GetBytecodeInstructionNumber(bcIns, handlerBeginBcIns);
801 const size_t handlerEndIdx = GetBytecodeInstructionNumber(bcIns, handlerEndBcIns);
802
803 const bool handlerBeginOffsetInRange = bcInsLast.GetAddress() > handlerBeginBcIns.GetAddress();
804 const bool handlerEndOffsetInRange = bcInsLast.GetAddress() > handlerEndBcIns.GetAddress();
805 const bool handlerEndPresent = catchBlock.GetCodeSize() != 0;
806 const bool handlerBeginOffsetValid = handlerBeginIdx != std::numeric_limits<size_t>::max();
807 const bool handlerEndOffsetValid = handlerEndIdx != std::numeric_limits<size_t>::max();
808
809 if (!handlerBeginOffsetInRange || !handlerBeginOffsetValid) {
810 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
811 << handlerBeginBcIns.GetAddress();
812 return false;
813 }
814
815 auto itBegin = labelTable->find(handlerBeginIdx);
816 if (itBegin == labelTable->end()) {
817 std::stringstream ss {};
818 ss << "handler_begin_label_" << tryIdx << "_" << catchIdx;
819 catchBlockPa->catchBeginLabel = ss.str();
820 labelTable->insert(std::pair<size_t, std::string>(handlerBeginIdx, ss.str()));
821 } else {
822 catchBlockPa->catchBeginLabel = itBegin->second;
823 }
824
825 if (!handlerEndOffsetInRange || !handlerEndOffsetValid) {
826 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
827 << handlerEndBcIns.GetAddress();
828 return false;
829 }
830
831 if (handlerEndPresent) {
832 auto itEnd = labelTable->find(handlerEndIdx);
833 if (itEnd == labelTable->end()) {
834 std::stringstream ss {};
835 ss << "handler_end_label_" << tryIdx << "_" << catchIdx;
836 catchBlockPa->catchEndLabel = ss.str();
837 labelTable->insert(std::pair<size_t, std::string>(handlerEndIdx, ss.str()));
838 } else {
839 catchBlockPa->catchEndLabel = itEnd->second;
840 }
841 }
842
843 return true;
844 }
845
846 template <typename T>
SetEntityAttribute(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute)847 static void SetEntityAttribute(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute)
848 {
849 if (shouldSet()) {
850 auto err = entity->metadata->SetAttribute(attribute);
851 if (err.has_value()) {
852 LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
853 }
854 }
855 }
856
857 template <typename T>
SetEntityAttributeValue(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute,const char * value)858 static void SetEntityAttributeValue(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute,
859 const char *value)
860 {
861 if (shouldSet()) {
862 auto err = entity->metadata->SetAttributeValue(attribute, value);
863 if (err.has_value()) {
864 LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
865 }
866 }
867 }
868
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & methodId) const869 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &methodId) const
870 {
871 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << methodId << " (0x" << std::hex << methodId << ")";
872
873 if (method == nullptr) {
874 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
875
876 return;
877 }
878
879 panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
880
881 const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
882
883 if (!methodAccessor.IsStatic()) {
884 const auto className = StringDataToString(file_->GetStringData(methodAccessor.GetClassId()));
885 auto thisType = pandasm::Type::FromDescriptor(className);
886
887 LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << methodNameRaw
888 << "\') is not static. emplacing self-argument of type " << thisType.GetName();
889
890 method->params.insert(method->params.begin(), pandasm::Function::Parameter(thisType, fileLanguage_));
891 }
892 SetEntityAttribute(
893 method, [&methodAccessor]() { return methodAccessor.IsStatic(); }, "static");
894
895 SetEntityAttribute(
896 method, [this, &methodAccessor]() { return file_->IsExternal(methodAccessor.GetMethodId()); }, "external");
897
898 SetEntityAttribute(
899 method, [&methodAccessor]() { return methodAccessor.IsNative(); }, "native");
900
901 SetEntityAttribute(
902 method, [&methodAccessor]() { return methodAccessor.IsAbstract(); }, "noimpl");
903
904 SetEntityAttribute(
905 method, [&methodAccessor]() { return methodAccessor.IsVarArgs(); }, "varargs");
906
907 SetEntityAttributeValue(
908 method, [&methodAccessor]() { return methodAccessor.IsPublic(); }, "access.function", "public");
909
910 SetEntityAttributeValue(
911 method, [&methodAccessor]() { return methodAccessor.IsProtected(); }, "access.function", "protected");
912
913 SetEntityAttributeValue(
914 method, [&methodAccessor]() { return methodAccessor.IsPrivate(); }, "access.function", "private");
915
916 SetEntityAttribute(
917 method, [&methodAccessor]() { return methodAccessor.IsFinal(); }, "final");
918
919 std::string ctorName = ark::panda_file::GetCtorName(fileLanguage_);
920 std::string cctorName = ark::panda_file::GetCctorName(fileLanguage_);
921
922 const bool isCtor = (methodNameRaw == ctorName);
923 const bool isCctor = (methodNameRaw == cctorName);
924
925 if (isCtor) {
926 method->metadata->SetAttribute("ctor");
927 method->name.replace(method->name.find(ctorName), ctorName.length(), "_ctor_");
928 } else if (isCctor) {
929 method->metadata->SetAttribute("cctor");
930 method->name.replace(method->name.find(cctorName), cctorName.length(), "_cctor_");
931 }
932 }
933
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & recordId) const934 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &recordId) const
935 {
936 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << recordId << " (0x" << std::hex << recordId << ")";
937
938 if (record == nullptr) {
939 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
940
941 return;
942 }
943
944 SetEntityAttribute(
945 record, [this, recordId]() { return file_->IsExternal(recordId); }, "external");
946
947 auto external = file_->IsExternal(recordId);
948 if (!external) {
949 auto cda = panda_file::ClassDataAccessor {*file_, recordId};
950 SetEntityAttributeValue(
951 record, [&cda]() { return cda.IsPublic(); }, "access.record", "public");
952
953 SetEntityAttributeValue(
954 record, [&cda]() { return cda.IsProtected(); }, "access.record", "protected");
955
956 SetEntityAttributeValue(
957 record, [&cda]() { return cda.IsPrivate(); }, "access.record", "private");
958
959 SetEntityAttribute(
960 record, [&cda]() { return cda.IsFinal(); }, "final");
961 }
962 }
963
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & fieldId) const964 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &fieldId) const
965 {
966 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << fieldId << " (0x" << std::hex << fieldId << ")";
967
968 if (field == nullptr) {
969 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
970
971 return;
972 }
973
974 panda_file::FieldDataAccessor fieldAccessor(*file_, fieldId);
975
976 SetEntityAttribute(
977 field, [&fieldAccessor]() { return fieldAccessor.IsExternal(); }, "external");
978
979 SetEntityAttribute(
980 field, [&fieldAccessor]() { return fieldAccessor.IsStatic(); }, "static");
981
982 SetEntityAttributeValue(
983 field, [&fieldAccessor]() { return fieldAccessor.IsPublic(); }, "access.field", "public");
984
985 SetEntityAttributeValue(
986 field, [&fieldAccessor]() { return fieldAccessor.IsProtected(); }, "access.field", "protected");
987
988 SetEntityAttributeValue(
989 field, [&fieldAccessor]() { return fieldAccessor.IsPrivate(); }, "access.field", "private");
990
991 SetEntityAttribute(
992 field, [&fieldAccessor]() { return fieldAccessor.IsFinal(); }, "final");
993 }
994
AnnotationTagToString(const char tag) const995 std::string Disassembler::AnnotationTagToString(const char tag) const
996 {
997 static const std::unordered_map<char, std::string> TAG_TO_STRING = {{'1', "u1"},
998 {'2', "i8"},
999 {'3', "u8"},
1000 {'4', "i16"},
1001 {'5', "u16"},
1002 {'6', "i32"},
1003 {'7', "u32"},
1004 {'8', "i64"},
1005 {'9', "u64"},
1006 {'A', "f32"},
1007 {'B', "f64"},
1008 {'C', "string"},
1009 {'D', "record"},
1010 {'E', "method"},
1011 {'F', "enum"},
1012 {'G', "annotation"},
1013 {'J', "method_handle"},
1014 {'H', "array"},
1015 {'K', "u1[]"},
1016 {'L', "i8[]"},
1017 {'M', "u8[]"},
1018 {'N', "i16[]"},
1019 {'O', "u16[]"},
1020 {'P', "i32[]"},
1021 {'Q', "u32[]"},
1022 {'R', "i64[]"},
1023 {'S', "u64[]"},
1024 {'T', "f32[]"},
1025 {'U', "f64[]"},
1026 {'V', "string[]"},
1027 {'W', "record[]"},
1028 {'X', "method[]"},
1029 {'Y', "enum[]"},
1030 {'Z', "annotation[]"},
1031 {'@', "method_handle[]"},
1032 {'*', "nullptr_string"}};
1033
1034 return TAG_TO_STRING.at(tag);
1035 }
1036
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1037 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1038 {
1039 std::stringstream ss;
1040
1041 if (type == "i8") {
1042 auto res = value.Get<int8_t>();
1043 ss << static_cast<int>(res);
1044 } else if (type == "u1" || type == "u8") {
1045 auto res = value.Get<uint8_t>();
1046 ss << static_cast<unsigned int>(res);
1047 } else if (type == "i16") {
1048 ss << value.Get<int16_t>();
1049 } else if (type == "u16") {
1050 ss << value.Get<uint16_t>();
1051 } else if (type == "i32") {
1052 ss << value.Get<int32_t>();
1053 } else if (type == "u32") {
1054 ss << value.Get<uint32_t>();
1055 } else if (type == "i64") {
1056 ss << value.Get<int64_t>();
1057 } else if (type == "u64") {
1058 ss << value.Get<uint64_t>();
1059 } else if (type == "f32") {
1060 ss << value.Get<float>();
1061 } else if (type == "f64") {
1062 ss << value.Get<double>();
1063 } else if (type == "string") {
1064 const auto id = value.Get<panda_file::File::EntityId>();
1065 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1066 } else if (type == "record") {
1067 const auto id = value.Get<panda_file::File::EntityId>();
1068 ss << GetFullRecordName(id);
1069 } else if (type == "method") {
1070 const auto id = value.Get<panda_file::File::EntityId>();
1071 AddMethodToTables(id);
1072 ss << GetMethodSignature(id);
1073 } else if (type == "enum") {
1074 const auto id = value.Get<panda_file::File::EntityId>();
1075 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1076 ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1077 << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1078 } else if (type == "annotation") {
1079 const auto id = value.Get<panda_file::File::EntityId>();
1080 ss << "id_" << id;
1081 } else if (type == "void") {
1082 return std::string();
1083 } else if (type == "method_handle") {
1084 } else if (type == "nullptr_string") {
1085 ss << static_cast<uint32_t>(0);
1086 }
1087
1088 return ss.str();
1089 }
1090
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1091 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1092 const size_t idx)
1093 {
1094 std::stringstream ss;
1095
1096 if (type == "i8") {
1097 auto res = value.Get<int8_t>(idx);
1098 ss << static_cast<int>(res);
1099 } else if (type == "u1" || type == "u8") {
1100 auto res = value.Get<uint8_t>(idx);
1101 ss << static_cast<unsigned int>(res);
1102 } else if (type == "i16") {
1103 ss << value.Get<int16_t>(idx);
1104 } else if (type == "u16") {
1105 ss << value.Get<uint16_t>(idx);
1106 } else if (type == "i32") {
1107 ss << value.Get<int32_t>(idx);
1108 } else if (type == "u32") {
1109 ss << value.Get<uint32_t>(idx);
1110 } else if (type == "i64") {
1111 ss << value.Get<int64_t>(idx);
1112 } else if (type == "u64") {
1113 ss << value.Get<uint64_t>(idx);
1114 } else if (type == "f32") {
1115 ss << value.Get<float>(idx);
1116 } else if (type == "f64") {
1117 ss << value.Get<double>(idx);
1118 } else if (type == "string") {
1119 const auto id = value.Get<panda_file::File::EntityId>(idx);
1120 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1121 } else if (type == "record") {
1122 const auto id = value.Get<panda_file::File::EntityId>(idx);
1123 ss << GetFullRecordName(id);
1124 } else if (type == "method") {
1125 const auto id = value.Get<panda_file::File::EntityId>(idx);
1126 AddMethodToTables(id);
1127 ss << GetMethodSignature(id);
1128 } else if (type == "enum") {
1129 const auto id = value.Get<panda_file::File::EntityId>(idx);
1130 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1131 ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1132 << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1133 } else if (type == "annotation") {
1134 const auto id = value.Get<panda_file::File::EntityId>(idx);
1135 ss << "id_" << id;
1136 } else if (type == "method_handle") {
1137 } else if (type == "nullptr_string") {
1138 }
1139
1140 return ss.str();
1141 }
1142
GetFullMethodName(const panda_file::File::EntityId & methodId) const1143 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &methodId) const
1144 {
1145 ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1146
1147 const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
1148
1149 std::string className = GetFullRecordName(methodAccessor.GetClassId());
1150 if (IsSystemType(className)) {
1151 className = "";
1152 } else {
1153 className += ".";
1154 }
1155
1156 return className + methodNameRaw;
1157 }
1158
GetMethodSignature(const panda_file::File::EntityId & methodId) const1159 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &methodId) const
1160 {
1161 ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1162
1163 pandasm::Function method(GetFullMethodName(methodId), fileLanguage_);
1164 GetParams(&method, methodAccessor.GetProtoId());
1165 GetMetaData(&method, methodId);
1166
1167 return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1168 }
1169
GetFullRecordName(const panda_file::File::EntityId & classId) const1170 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &classId) const
1171 {
1172 std::string name = StringDataToString(file_->GetStringData(classId));
1173
1174 auto type = pandasm::Type::FromDescriptor(name);
1175 type = pandasm::Type(type.GetComponentName(), type.GetRank());
1176
1177 return type.GetPandasmName();
1178 }
1179
1180 static constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1181
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor,std::stringstream & ss)1182 static void GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor, std::stringstream &ss)
1183 {
1184 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1185 << ", type: 0x" << fieldAccessor.GetType();
1186 }
1187
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor)1188 static std::string GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor)
1189 {
1190 std::stringstream ss;
1191 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1192 << ", type: 0x" << fieldAccessor.GetType();
1193 return ss.str();
1194 }
1195
GetRecordInfo(const panda_file::File::EntityId & recordId,RecordInfo * recordInfo) const1196 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &recordId, RecordInfo *recordInfo) const
1197 {
1198 if (file_->IsExternal(recordId)) {
1199 return;
1200 }
1201
1202 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
1203 std::stringstream ss;
1204
1205 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << classAccessor.GetClassId()
1206 << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << classAccessor.GetSize() << " ("
1207 << std::dec << classAccessor.GetSize() << ")";
1208
1209 recordInfo->recordInfo = ss.str();
1210 ss.str(std::string());
1211
1212 classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
1213 GetFieldInfo(fieldAccessor, ss);
1214
1215 recordInfo->fieldsInfo.push_back(ss.str());
1216
1217 ss.str(std::string());
1218 });
1219 }
1220
GetMethodInfo(const panda_file::File::EntityId & methodId,MethodInfo * methodInfo) const1221 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &methodId, MethodInfo *methodInfo) const
1222 {
1223 panda_file::MethodDataAccessor methodAccessor {*file_, methodId};
1224 std::stringstream ss;
1225
1226 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1227 << methodAccessor.GetMethodId();
1228
1229 if (methodAccessor.GetCodeId().has_value()) {
1230 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1231 << methodAccessor.GetCodeId().value();
1232
1233 GetInsInfo(methodAccessor, methodAccessor.GetCodeId().value(), methodInfo);
1234 } else {
1235 ss << ", <no code>";
1236 }
1237
1238 auto profileSize = methodAccessor.GetProfileSize();
1239 if (profileSize) {
1240 ss << ", profile size: " << profileSize.value();
1241 }
1242
1243 methodInfo->methodInfo = ss.str();
1244
1245 if (methodAccessor.GetCodeId()) {
1246 ASSERT(debugInfoExtractor_ != nullptr);
1247 methodInfo->lineNumberTable = debugInfoExtractor_->GetLineNumberTable(methodId);
1248 methodInfo->localVariableTable = debugInfoExtractor_->GetLocalVariableTable(methodId);
1249
1250 // Add information about parameters into the table
1251 panda_file::CodeDataAccessor codeda(*file_, methodAccessor.GetCodeId().value());
1252 auto argIdx = static_cast<int32_t>(codeda.GetNumVregs());
1253 uint32_t codeSize = codeda.GetCodeSize();
1254 for (const auto &info : debugInfoExtractor_->GetParameterInfo(methodId)) {
1255 panda_file::LocalVariableInfo argInfo {info.name, info.signature, "", argIdx++, 0, codeSize};
1256 methodInfo->localVariableTable.emplace_back(argInfo);
1257 }
1258 }
1259 }
1260
Serialize(const std::string & name,const pandasm::LiteralArray & litArray,std::ostream & os) const1261 void Disassembler::Serialize(const std::string &name, const pandasm::LiteralArray &litArray, std::ostream &os) const
1262 {
1263 if (litArray.literals.empty()) {
1264 return;
1265 }
1266
1267 bool isConst = litArray.literals[0].IsArray();
1268
1269 std::stringstream specifiers {};
1270
1271 if (isConst) {
1272 specifiers << LiteralTagToString(litArray.literals[0].tag) << " " << litArray.literals.size() << " ";
1273 }
1274
1275 os << ".array array_" << name << " " << specifiers.str() << "{";
1276
1277 SerializeValues(litArray, isConst, os);
1278
1279 os << "}\n";
1280 }
1281
LiteralTagToString(const panda_file::LiteralTag & tag) const1282 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1283 {
1284 switch (tag) {
1285 case panda_file::LiteralTag::BOOL:
1286 case panda_file::LiteralTag::ARRAY_U1:
1287 return "u1";
1288 case panda_file::LiteralTag::ARRAY_U8:
1289 return "u8";
1290 case panda_file::LiteralTag::ARRAY_I8:
1291 return "i8";
1292 case panda_file::LiteralTag::ARRAY_U16:
1293 return "u16";
1294 case panda_file::LiteralTag::ARRAY_I16:
1295 return "i16";
1296 case panda_file::LiteralTag::ARRAY_U32:
1297 return "u32";
1298 case panda_file::LiteralTag::INTEGER:
1299 case panda_file::LiteralTag::ARRAY_I32:
1300 return "i32";
1301 case panda_file::LiteralTag::ARRAY_U64:
1302 return "u64";
1303 case panda_file::LiteralTag::BIGINT:
1304 case panda_file::LiteralTag::ARRAY_I64:
1305 return "i64";
1306 case panda_file::LiteralTag::FLOAT:
1307 case panda_file::LiteralTag::ARRAY_F32:
1308 return "f32";
1309 case panda_file::LiteralTag::DOUBLE:
1310 case panda_file::LiteralTag::ARRAY_F64:
1311 return "f64";
1312 case panda_file::LiteralTag::STRING:
1313 case panda_file::LiteralTag::ARRAY_STRING:
1314 return pandasm::Type::FromDescriptor(panda_file::GetStringClassDescriptor(fileLanguage_)).GetPandasmName();
1315 case panda_file::LiteralTag::ACCESSOR:
1316 return "accessor";
1317 case panda_file::LiteralTag::NULLVALUE:
1318 return "nullvalue";
1319 case panda_file::LiteralTag::METHODAFFILIATE:
1320 return "method_affiliate";
1321 case panda_file::LiteralTag::METHOD:
1322 return "method";
1323 case panda_file::LiteralTag::GENERATORMETHOD:
1324 return "generator_method";
1325 default:
1326 LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
1327 UNREACHABLE();
1328 }
1329 }
1330
LiteralValueToString(const pandasm::LiteralArray::Literal & lit) const1331 std::string Disassembler::LiteralValueToString(const pandasm::LiteralArray::Literal &lit) const
1332 {
1333 if (lit.IsBoolValue()) {
1334 std::stringstream res {};
1335 res << (std::get<bool>(lit.value));
1336 return res.str();
1337 }
1338
1339 if (lit.IsByteValue()) {
1340 return LiteralIntegralValueToString<uint8_t>(lit);
1341 }
1342
1343 if (lit.IsShortValue()) {
1344 return LiteralIntegralValueToString<uint16_t>(lit);
1345 }
1346
1347 if (lit.IsIntegerValue()) {
1348 return LiteralIntegralValueToString<uint32_t>(lit);
1349 }
1350
1351 if (lit.IsLongValue()) {
1352 return LiteralIntegralValueToString<uint64_t>(lit);
1353 }
1354
1355 if (lit.IsDoubleValue()) {
1356 std::stringstream res {};
1357 res << std::get<double>(lit.value);
1358 return res.str();
1359 }
1360
1361 if (lit.IsFloatValue()) {
1362 std::stringstream res {};
1363 res << std::get<float>(lit.value);
1364 return res.str();
1365 }
1366
1367 if (lit.IsStringValue()) {
1368 std::stringstream res {};
1369 res << "\"" << std::get<std::string>(lit.value) << "\"";
1370 return res.str();
1371 }
1372
1373 UNREACHABLE();
1374 }
1375
SerializeValues(const pandasm::LiteralArray & litArray,const bool isConst,std::ostream & os) const1376 void Disassembler::SerializeValues(const pandasm::LiteralArray &litArray, const bool isConst, std::ostream &os) const
1377 {
1378 std::string separator = (isConst) ? (" ") : ("\n");
1379
1380 os << separator;
1381
1382 if (isConst) {
1383 for (const auto &l : litArray.literals) {
1384 os << LiteralValueToString(l) << separator;
1385 }
1386 } else {
1387 for (const auto &l : litArray.literals) {
1388 os << "\t" << LiteralTagToString(l.tag) << " " << LiteralValueToString(l) << separator;
1389 }
1390 }
1391 }
1392
Serialize(const pandasm::Record & record,std::ostream & os,bool printInformation) const1393 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1394 {
1395 if (IsSystemType(record.name)) {
1396 return;
1397 }
1398
1399 os << ".record " << record.name;
1400
1401 const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1402 const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1403 if (recordInTable) {
1404 Serialize(*record.metadata, recordIter->second.annList, os);
1405 } else {
1406 Serialize(*record.metadata, {}, os);
1407 }
1408
1409 if (record.metadata->IsForeign() && record.fieldList.empty()) {
1410 os << "\n\n";
1411 return;
1412 }
1413
1414 os << " {";
1415
1416 if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1417 os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1418 SerializeFields(record, os, true);
1419 } else {
1420 os << "\n";
1421 SerializeFields(record, os, false);
1422 }
1423
1424 os << "}\n\n";
1425 }
1426
SerializeUnionFields(const pandasm::Record & record,std::ostream & os,bool printInformation) const1427 void Disassembler::SerializeUnionFields(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1428 {
1429 if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1430 os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1431 SerializeFields(record, os, true, true);
1432 } else {
1433 SerializeFields(record, os, false, true);
1434 }
1435 os << "\n";
1436 }
1437
SerializeFields(const pandasm::Record & record,std::ostream & os,bool printInformation,bool isUnion) const1438 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool printInformation,
1439 bool isUnion) const
1440 {
1441 constexpr size_t INFO_OFFSET = 80;
1442
1443 const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1444 const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1445
1446 const auto recInf = (printInformation) ? (progInfo_.recordsInfo.at(record.name)) : (RecordInfo {});
1447
1448 size_t fieldIdx = 0;
1449
1450 std::stringstream ss;
1451 for (const auto &f : record.fieldList) {
1452 if (isUnion) {
1453 ss << ".union_field ";
1454 } else {
1455 ss << "\t";
1456 }
1457 ss << f.type.GetPandasmName() << " " << f.name;
1458 if (!isUnion && recordInTable) {
1459 const auto fieldIter = recordIter->second.fieldAnnotations.find(f.name);
1460 if (fieldIter != recordIter->second.fieldAnnotations.end()) {
1461 Serialize(*f.metadata, fieldIter->second, ss);
1462 } else {
1463 Serialize(*f.metadata, {}, ss);
1464 }
1465 } else if (!isUnion && !recordInTable) {
1466 Serialize(*f.metadata, {}, ss);
1467 }
1468
1469 if (printInformation) {
1470 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << recInf.fieldsInfo.at(fieldIdx) << "\n";
1471 } else {
1472 os << ss.str() << "\n";
1473 }
1474
1475 ss.str(std::string());
1476 ss.clear();
1477
1478 fieldIdx++;
1479 }
1480 }
1481
Serialize(const pandasm::Function::CatchBlock & catchBlock,std::ostream & os) const1482 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catchBlock, std::ostream &os) const
1483 {
1484 if (catchBlock.exceptionRecord.empty()) {
1485 os << ".catchall ";
1486 } else {
1487 os << ".catch " << catchBlock.exceptionRecord << ", ";
1488 }
1489
1490 os << catchBlock.tryBeginLabel << ", " << catchBlock.tryEndLabel << ", " << catchBlock.catchBeginLabel;
1491
1492 if (!catchBlock.catchEndLabel.empty()) {
1493 os << ", " << catchBlock.catchEndLabel;
1494 }
1495 }
1496
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & annList,std::ostream & os) const1497 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &annList, std::ostream &os) const
1498 {
1499 auto boolAttributes = meta.GetBoolAttributes();
1500 auto attributes = meta.GetAttributes();
1501 if (boolAttributes.empty() && attributes.empty() && annList.empty()) {
1502 return;
1503 }
1504
1505 os << " <";
1506
1507 size_t size = boolAttributes.size();
1508 size_t idx = 0;
1509 for (const auto &attr : boolAttributes) {
1510 os << attr;
1511 ++idx;
1512
1513 if (!attributes.empty() || !annList.empty() || idx < size) {
1514 os << ", ";
1515 }
1516 }
1517
1518 size = attributes.size();
1519 idx = 0;
1520 for (const auto &[key, values] : attributes) {
1521 for (size_t i = 0; i < values.size(); i++) {
1522 os << key << "=" << values[i];
1523
1524 if (i < values.size() - 1) {
1525 os << ", ";
1526 }
1527 }
1528
1529 ++idx;
1530
1531 if (!annList.empty() || idx < size) {
1532 os << ", ";
1533 }
1534 }
1535
1536 size = annList.size();
1537 idx = 0;
1538 for (const auto &[key, value] : annList) {
1539 os << key << "=" << value;
1540
1541 ++idx;
1542
1543 if (idx < size) {
1544 os << ", ";
1545 }
1546 }
1547
1548 os << ">";
1549 }
1550
SerializeLineNumberTable(const panda_file::LineNumberTable & lineNumberTable,std::ostream & os) const1551 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &lineNumberTable, std::ostream &os) const
1552 {
1553 if (lineNumberTable.empty()) {
1554 return;
1555 }
1556
1557 os << "\n# LINE_NUMBER_TABLE:\n";
1558 for (const auto &lineInfo : lineNumberTable) {
1559 os << "#\tline " << lineInfo.line << ": " << lineInfo.offset << "\n";
1560 }
1561 }
1562
SerializeLocalVariableTable(const panda_file::LocalVariableTable & localVariableTable,const pandasm::Function & method,std::ostream & os) const1563 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &localVariableTable,
1564 const pandasm::Function &method, std::ostream &os) const
1565 {
1566 if (localVariableTable.empty()) {
1567 return;
1568 }
1569
1570 os << "\n# LOCAL_VARIABLE_TABLE:\n";
1571 os << "#\t Start End Register Name Signature\n";
1572 const int startWidth = 5;
1573 const int endWidth = 4;
1574 const int regWidth = 8;
1575 const int nameWidth = 14;
1576 for (const auto &variableInfo : localVariableTable) {
1577 std::ostringstream regStream;
1578 regStream << variableInfo.regNumber << '(';
1579 if (variableInfo.regNumber < 0) {
1580 regStream << "acc";
1581 } else {
1582 uint32_t vreg = variableInfo.regNumber;
1583 uint32_t firstArgReg = method.GetTotalRegs();
1584 if (vreg < firstArgReg) {
1585 regStream << 'v' << vreg;
1586 } else {
1587 regStream << 'a' << vreg - firstArgReg;
1588 }
1589 }
1590 regStream << ')';
1591
1592 os << "#\t " << std::setw(startWidth) << std::right << variableInfo.startOffset << " ";
1593 os << std::setw(endWidth) << std::right << variableInfo.endOffset << " ";
1594 os << std::setw(regWidth) << std::right << regStream.str() << " ";
1595 os << std::setw(nameWidth) << std::right << variableInfo.name << " " << variableInfo.type;
1596 if (!variableInfo.typeSignature.empty() && variableInfo.typeSignature != variableInfo.type) {
1597 os << " (" << variableInfo.typeSignature << ")";
1598 }
1599 os << "\n";
1600 }
1601 }
1602
SerializeLanguage(std::ostream & os) const1603 void Disassembler::SerializeLanguage(std::ostream &os) const
1604 {
1605 os << ".language " << ark::panda_file::LanguageToString(fileLanguage_) << "\n\n";
1606 }
1607
SerializeFilename(std::ostream & os) const1608 void Disassembler::SerializeFilename(std::ostream &os) const
1609 {
1610 if (file_ == nullptr || file_->GetFilename().empty()) {
1611 return;
1612 }
1613
1614 os << "# source binary: " << file_->GetFilename() << "\n\n";
1615 }
1616
SerializeLitArrays(std::ostream & os,bool addSeparators) const1617 void Disassembler::SerializeLitArrays(std::ostream &os, bool addSeparators) const
1618 {
1619 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
1620
1621 if (prog_.literalarrayTable.empty()) {
1622 return;
1623 }
1624
1625 if (addSeparators) {
1626 os << "# ====================\n"
1627 "# LITERALS\n\n";
1628 }
1629
1630 for (const auto &pair : prog_.literalarrayTable) {
1631 Serialize(pair.first, pair.second, os);
1632 }
1633
1634 os << "\n";
1635 }
1636
SerializeRecords(std::ostream & os,bool addSeparators,bool printInformation) const1637 void Disassembler::SerializeRecords(std::ostream &os, bool addSeparators, bool printInformation) const
1638 {
1639 LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
1640
1641 if (prog_.recordTable.empty()) {
1642 return;
1643 }
1644
1645 if (addSeparators) {
1646 os << "# ====================\n"
1647 "# RECORDS\n\n";
1648 }
1649
1650 for (const auto &r : prog_.recordTable) {
1651 if (!panda_file::IsDummyClassName(r.first)) {
1652 Serialize(r.second, os, printInformation);
1653 } else {
1654 SerializeUnionFields(r.second, os, printInformation);
1655 }
1656 }
1657 }
1658
SerializeMethods(std::ostream & os,bool addSeparators,bool printInformation) const1659 void Disassembler::SerializeMethods(std::ostream &os, bool addSeparators, bool printInformation) const
1660 {
1661 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
1662
1663 if (prog_.functionTable.empty()) {
1664 return;
1665 }
1666
1667 if (addSeparators) {
1668 os << "# ====================\n"
1669 "# METHODS\n\n";
1670 }
1671
1672 for (const auto &m : prog_.functionTable) {
1673 Serialize(m.second, os, printInformation);
1674 }
1675 }
1676
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1677 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1678 {
1679 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1680 }
1681
IDToString(BytecodeInstruction bcIns,panda_file::File::EntityId methodId) const1682 std::string Disassembler::IDToString(BytecodeInstruction bcIns, panda_file::File::EntityId methodId) const
1683 {
1684 std::stringstream name;
1685
1686 if (bcIns.HasFlag(BytecodeInstruction::Flags::TYPE_ID)) {
1687 auto idx = bcIns.GetId().AsIndex();
1688 auto id = file_->ResolveClassIndex(methodId, idx);
1689 auto type = pandasm::Type::FromDescriptor(StringDataToString(file_->GetStringData(id)));
1690
1691 name.str("");
1692 name << type.GetPandasmName();
1693 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1694 auto idx = bcIns.GetId().AsIndex();
1695 auto id = file_->ResolveMethodIndex(methodId, idx);
1696
1697 name << GetMethodSignature(id);
1698 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
1699 name << '\"';
1700
1701 if (skipStrings_ || quiet_) {
1702 name << std::hex << "0x" << bcIns.GetId().AsFileId();
1703 } else {
1704 name << StringDataToString(file_->GetStringData(bcIns.GetId().AsFileId()));
1705 }
1706
1707 name << '\"';
1708 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1709 auto idx = bcIns.GetId().AsIndex();
1710 auto id = file_->ResolveFieldIndex(methodId, idx);
1711 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1712
1713 auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1714 if (!panda_file::IsDummyClassName(recordName)) {
1715 name << recordName;
1716 name << '.';
1717 }
1718 name << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1719 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
1720 auto index = bcIns.GetId().AsIndex();
1721 name << "array_" << index;
1722 }
1723
1724 return name.str();
1725 }
1726
GetRecordLanguage(panda_file::File::EntityId classId) const1727 ark::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId classId) const
1728 {
1729 if (file_->IsExternal(classId)) {
1730 return ark::panda_file::SourceLang::PANDA_ASSEMBLY;
1731 }
1732
1733 panda_file::ClassDataAccessor cda(*file_, classId);
1734 return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1735 }
1736
TranslateImmToLabel(pandasm::Ins * paIns,LabelTable * labelTable,const uint8_t * insArr,BytecodeInstruction bcIns,BytecodeInstruction bcInsLast,panda_file::File::EntityId codeId)1737 static void TranslateImmToLabel(pandasm::Ins *paIns, LabelTable *labelTable, const uint8_t *insArr,
1738 BytecodeInstruction bcIns, BytecodeInstruction bcInsLast,
1739 panda_file::File::EntityId codeId)
1740 {
1741 const int32_t jmpOffset = std::get<int64_t>(paIns->imms.at(0));
1742 const auto bcInsDest = bcIns.JumpTo(jmpOffset);
1743 if (bcInsLast.GetAddress() > bcInsDest.GetAddress()) {
1744 size_t idx = GetBytecodeInstructionNumber(BytecodeInstruction(insArr), bcInsDest);
1745 if (idx != std::numeric_limits<size_t>::max()) {
1746 if (labelTable->find(idx) == labelTable->end()) {
1747 std::stringstream ss {};
1748 ss << "jump_label_" << labelTable->size();
1749 (*labelTable)[idx] = ss.str();
1750 }
1751
1752 paIns->imms.clear();
1753 paIns->ids.push_back(labelTable->at(idx));
1754 } else {
1755 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1756 << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1757 << ": invalid jump offset 0x" << jmpOffset
1758 << " - jumping in the middle of another instruction!";
1759 }
1760 } else {
1761 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1762 << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1763 << ": invalid jump offset 0x" << jmpOffset << " - jumping out of bounds!";
1764 }
1765 }
1766
CollectExternalFields(const panda_file::FieldDataAccessor & fieldAccessor)1767 void Disassembler::CollectExternalFields(const panda_file::FieldDataAccessor &fieldAccessor)
1768 {
1769 auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1770
1771 pandasm::Field field(fileLanguage_);
1772 GetField(field, fieldAccessor);
1773 if (field.name.empty()) {
1774 return;
1775 }
1776
1777 auto &fieldList = externalFieldTable_[recordName];
1778 auto retField = std::find_if(fieldList.begin(), fieldList.end(),
1779 [&field](pandasm::Field &fieldFromList) { return field.name == fieldFromList.name; });
1780 if (retField == fieldList.end()) {
1781 fieldList.emplace_back(std::move(field));
1782
1783 externalFieldsInfoTable_[recordName].emplace_back(GetFieldInfo(fieldAccessor));
1784 }
1785 }
1786
GetInstructions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId)1787 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId methodId,
1788 panda_file::File::EntityId codeId)
1789 {
1790 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
1791
1792 const auto insSz = codeAccessor.GetCodeSize();
1793 const auto insArr = codeAccessor.GetInstructions();
1794
1795 method->regsNum = codeAccessor.GetNumVregs();
1796
1797 auto bcIns = BytecodeInstruction(insArr);
1798 auto from = bcIns.GetAddress();
1799 const auto bcInsLast = bcIns.JumpTo(insSz);
1800
1801 LabelTable labelTable = GetExceptions(method, methodId, codeId);
1802
1803 IdList unknownExternalMethods {};
1804
1805 while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
1806 if (bcIns.GetAddress() > bcInsLast.GetAddress()) {
1807 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1808 << "). bytecode instructions sequence corrupted for method " << method->name
1809 << "! went out of bounds";
1810
1811 break;
1812 }
1813
1814 if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1815 auto idx = bcIns.GetId().AsIndex();
1816 auto id = file_->ResolveFieldIndex(methodId, idx);
1817 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1818
1819 if (fieldAccessor.IsExternal()) {
1820 CollectExternalFields(fieldAccessor);
1821 }
1822 }
1823
1824 auto paIns = BytecodeInstructionToPandasmInstruction(bcIns, methodId);
1825 paIns.insDebug.boundLeft =
1826 bcIns.GetAddress() - from; // It is used to produce a line table during method serialization
1827 if (paIns.IsJump()) {
1828 TranslateImmToLabel(&paIns, &labelTable, insArr, bcIns, bcInsLast, codeId);
1829 }
1830
1831 // check if method id is unknown external method. if so, emplace it in table
1832 if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1833 const auto argMethodIdx = bcIns.GetId().AsIndex();
1834 const auto argMethodId = file_->ResolveMethodIndex(methodId, argMethodIdx);
1835
1836 const auto argMethodSignature = GetMethodSignature(argMethodId);
1837
1838 const bool isPresent = prog_.functionTable.find(argMethodSignature) != prog_.functionTable.cend();
1839 const bool isExternal = file_->IsExternal(argMethodId);
1840 if (isExternal && !isPresent) {
1841 unknownExternalMethods.push_back(argMethodId);
1842 }
1843 }
1844
1845 method->ins.push_back(paIns);
1846 bcIns = bcIns.GetNext();
1847 }
1848
1849 for (const auto &pair : labelTable) {
1850 method->ins[pair.first].label = pair.second;
1851 method->ins[pair.first].setLabel = true;
1852 }
1853
1854 return unknownExternalMethods;
1855 }
1856
1857 } // namespace ark::disasm
1858