1 /*
2 * Copyright (c) 2021-2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "disassembler.h"
17 #include "class_data_accessor.h"
18 #include "field_data_accessor.h"
19 #include "libpandafile/type_helper.h"
20 #include "mangling.h"
21 #include "utils/logger.h"
22
23 #include <iomanip>
24
25 #include "get_language_specific_metadata.inc"
26
27 namespace panda::disasm {
28
Disassemble(std::string_view filenameIn,const bool quiet,const bool skipStrings)29 void Disassembler::Disassemble(std::string_view filenameIn, const bool quiet, const bool skipStrings)
30 {
31 auto file = panda_file::File::Open(filenameIn);
32 if (file == nullptr) {
33 LOG(FATAL, DISASSEMBLER) << "> unable to open specified pandafile: <" << filenameIn << ">";
34 }
35
36 Disassemble(file, quiet, skipStrings);
37 }
38
Disassemble(const panda_file::File & file,const bool quiet,const bool skipStrings)39 void Disassembler::Disassemble(const panda_file::File &file, const bool quiet, const bool skipStrings)
40 {
41 SetFile(file);
42 DisassembleImpl(quiet, skipStrings);
43 }
44
Disassemble(std::unique_ptr<const panda_file::File> & file,const bool quiet,const bool skipStrings)45 void Disassembler::Disassemble(std::unique_ptr<const panda_file::File> &file, const bool quiet, const bool skipStrings)
46 {
47 SetFile(file);
48 DisassembleImpl(quiet, skipStrings);
49 }
50
DisassembleImpl(const bool quiet,const bool skipStrings)51 void Disassembler::DisassembleImpl(const bool quiet, const bool skipStrings)
52 {
53 prog_ = pandasm::Program {};
54
55 recordNameToId_.clear();
56 methodNameToId_.clear();
57
58 skipStrings_ = skipStrings;
59 quiet_ = quiet;
60
61 progInfo_ = ProgInfo {};
62
63 progAnn_ = ProgAnnotations {};
64
65 GetLiteralArrays();
66 GetRecords();
67
68 AddExternalFieldsToRecords();
69 GetLanguageSpecificMetadata();
70 }
71
SetFile(std::unique_ptr<const panda_file::File> & file)72 void Disassembler::SetFile(std::unique_ptr<const panda_file::File> &file)
73 {
74 fileHolder_.swap(file);
75 file_ = fileHolder_.get();
76 }
77
SetFile(const panda_file::File & file)78 void Disassembler::SetFile(const panda_file::File &file)
79 {
80 fileHolder_.reset();
81 file_ = &file;
82 }
83
SetProfile(std::string_view fname)84 void Disassembler::SetProfile(std::string_view fname)
85 {
86 std::ifstream stm(fname.data(), std::ios::binary);
87 if (!stm.is_open()) {
88 LOG(FATAL, DISASSEMBLER) << "Cannot open profile file";
89 }
90
91 auto res = profiling::ReadProfile(stm, fileLanguage_);
92 if (!res) {
93 LOG(FATAL, DISASSEMBLER) << "Failed to deserialize: " << res.Error();
94 }
95 profile_ = res.Value();
96 }
97
GetInsInfo(panda_file::MethodDataAccessor & mda,const panda_file::File::EntityId & codeId,MethodInfo * methodInfo) const98 void Disassembler::GetInsInfo(panda_file::MethodDataAccessor &mda, const panda_file::File::EntityId &codeId,
99 MethodInfo *methodInfo /* out */) const
100 {
101 const static size_t FORMAT_WIDTH = 20;
102 const static size_t INSTRUCTION_WIDTH = 2;
103
104 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
105
106 std::string methodName = mda.GetFullName();
107 auto prof = profiling::INVALID_PROFILE;
108 if (profile_ != profiling::INVALID_PROFILE) {
109 prof = profiling::FindMethodInProfile(profile_, fileLanguage_, methodName);
110 }
111
112 auto insSz = codeAccessor.GetCodeSize();
113 auto insArr = codeAccessor.GetInstructions();
114
115 auto bcIns = BytecodeInstruction(insArr);
116 auto bcInsLast = bcIns.JumpTo(insSz);
117
118 while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
119 std::stringstream ss;
120
121 uintptr_t bc = bcIns.GetAddress() - BytecodeInstruction(insArr).GetAddress();
122 ss << "offset: 0x" << std::setfill('0') << std::setw(4U) << std::hex << bc;
123 ss << ", " << std::setfill('.');
124
125 BytecodeInstruction::Format format = bcIns.GetFormat();
126
127 auto formatStr = std::string("[") + BytecodeInstruction::GetFormatString(format) + ']';
128 ss << std::setw(FORMAT_WIDTH) << std::left << formatStr;
129
130 ss << "[";
131
132 const uint8_t *pc = bcIns.GetAddress();
133 const size_t sz = bcIns.GetSize();
134
135 for (size_t i = 0; i < sz; i++) {
136 ss << "0x" << std::setw(INSTRUCTION_WIDTH) << std::setfill('0') << std::right << std::hex
137 << static_cast<int>(pc[i]); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
138
139 if (i != sz - 1) {
140 ss << " ";
141 }
142 }
143
144 ss << "]";
145
146 if (profile_ != profiling::INVALID_PROFILE && prof != profiling::INVALID_PROFILE) {
147 auto profId = bcIns.GetProfileId();
148 if (profId != -1) {
149 ss << ", Profile: ";
150 profiling::DumpProfile(prof, fileLanguage_, &bcIns, ss);
151 }
152 }
153
154 methodInfo->instructionsInfo.push_back(ss.str());
155
156 bcIns = bcIns.GetNext();
157 }
158 }
159
CollectInfo()160 void Disassembler::CollectInfo()
161 {
162 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
163
164 debugInfoExtractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_);
165
166 for (const auto &pair : recordNameToId_) {
167 GetRecordInfo(pair.second, &progInfo_.recordsInfo[pair.first]);
168 }
169
170 for (const auto &pair : methodNameToId_) {
171 GetMethodInfo(pair.second, &progInfo_.methodsInfo[pair.first]);
172 }
173 }
174
Serialize(std::ostream & os,bool addSeparators,bool printInformation) const175 void Disassembler::Serialize(std::ostream &os, bool addSeparators, bool printInformation) const
176 {
177 if (os.bad()) {
178 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
179
180 return;
181 }
182
183 SerializeFilename(os);
184 SerializeLanguage(os);
185 SerializeLitArrays(os, addSeparators);
186 SerializeRecords(os, addSeparators, printInformation);
187 SerializeMethods(os, addSeparators, printInformation);
188 }
189
Serialize(const pandasm::Function & method,std::ostream & os,bool printInformation,panda_file::LineNumberTable * lineTable) const190 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool printInformation,
191 panda_file::LineNumberTable *lineTable) const
192 {
193 std::ostringstream headerSs;
194 headerSs << ".function " << method.returnType.GetPandasmName() << " " << method.name << "(";
195
196 if (!method.params.empty()) {
197 headerSs << method.params[0].type.GetPandasmName() << " a0";
198
199 for (size_t i = 1; i < method.params.size(); i++) {
200 headerSs << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
201 }
202 }
203 headerSs << ")";
204
205 const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
206
207 const auto methodIter = progAnn_.methodAnnotations.find(signature);
208 if (methodIter != progAnn_.methodAnnotations.end()) {
209 Serialize(*method.metadata, methodIter->second, headerSs);
210 } else {
211 Serialize(*method.metadata, {}, headerSs);
212 }
213
214 if (!method.HasImplementation()) {
215 headerSs << "\n\n";
216 os << headerSs.str();
217 return;
218 }
219
220 headerSs << " {";
221
222 size_t width;
223 const MethodInfo *methodInfo;
224 auto methodInfoIt = progInfo_.methodsInfo.find(signature);
225 bool printMethodInfo = printInformation && methodInfoIt != progInfo_.methodsInfo.end();
226 if (printMethodInfo) {
227 methodInfo = &methodInfoIt->second;
228
229 width = 0;
230 for (const auto &i : method.ins) {
231 if (i.ToString().size() > width) {
232 width = i.ToString().size();
233 }
234 }
235
236 headerSs << " # " << methodInfo->methodInfo << "\n# CODE:";
237 }
238
239 headerSs << "\n";
240
241 auto headerSsStr = headerSs.str();
242 size_t lineNumber = std::count(headerSsStr.begin(), headerSsStr.end(), '\n') + 1;
243
244 os << headerSsStr;
245
246 for (size_t i = 0; i < method.ins.size(); i++) {
247 std::ostringstream insSs;
248
249 std::string ins = method.ins[i].ToString("", method.GetParamsNum() != 0, method.regsNum);
250 if (method.ins[i].setLabel) {
251 std::string delim = ": ";
252 size_t pos = ins.find(delim);
253 std::string label = ins.substr(0, pos);
254 ins.erase(0, pos + delim.length());
255
256 insSs << label << ":\n";
257 }
258
259 insSs << "\t";
260 if (printMethodInfo) {
261 insSs << std::setw(width) << std::left;
262 }
263 insSs << ins;
264 if (printMethodInfo) {
265 ASSERT(methodInfo != nullptr);
266 insSs << " # " << methodInfo->instructionsInfo[i];
267 }
268 insSs << "\n";
269
270 auto insSsStr = insSs.str();
271 lineNumber += std::count(insSsStr.begin(), insSsStr.end(), '\n');
272
273 if (lineTable != nullptr) {
274 lineTable->emplace_back(
275 panda_file::LineTableEntry {static_cast<uint32_t>(method.ins[i].insDebug.boundLeft), lineNumber - 1});
276 }
277
278 os << insSsStr;
279 }
280
281 if (!method.catchBlocks.empty()) {
282 os << "\n";
283
284 for (const auto &catchBlock : method.catchBlocks) {
285 Serialize(catchBlock, os);
286
287 os << "\n";
288 }
289 }
290
291 if (printMethodInfo) {
292 ASSERT(methodInfo != nullptr);
293 SerializeLineNumberTable(methodInfo->lineNumberTable, os);
294 SerializeLocalVariableTable(methodInfo->localVariableTable, method, os);
295 }
296
297 os << "}\n\n";
298 }
299
IsSystemType(const std::string & typeName)300 inline bool Disassembler::IsSystemType(const std::string &typeName)
301 {
302 bool isArrayType = typeName.back() == ']';
303 bool isGlobal = typeName == "_GLOBAL";
304
305 return isArrayType || isGlobal;
306 }
307
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & recordId)308 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &recordId)
309 {
310 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << recordId << " (0x" << std::hex << recordId << ")";
311
312 if (record == nullptr) {
313 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
314
315 return;
316 }
317
318 record->name = GetFullRecordName(recordId);
319
320 LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
321
322 GetMetaData(record, recordId);
323
324 if (!file_->IsExternal(recordId)) {
325 GetMethods(recordId);
326 GetFields(record, recordId);
327 }
328 }
329
AddMethodToTables(const panda_file::File::EntityId & methodId)330 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &methodId)
331 {
332 pandasm::Function newMethod("", fileLanguage_);
333 GetMethod(&newMethod, methodId);
334
335 const auto signature = pandasm::GetFunctionSignatureFromName(newMethod.name, newMethod.params);
336 if (prog_.functionTable.find(signature) != prog_.functionTable.end()) {
337 return;
338 }
339
340 methodNameToId_.emplace(signature, methodId);
341 prog_.functionSynonyms[newMethod.name].push_back(signature);
342 prog_.functionTable.emplace(signature, std::move(newMethod));
343 }
344
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & methodId)345 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &methodId)
346 {
347 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << methodId << " (0x" << std::hex << methodId << ")";
348
349 if (method == nullptr) {
350 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
351
352 return;
353 }
354
355 panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
356
357 method->name = GetFullMethodName(methodId);
358
359 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
360
361 GetParams(method, methodAccessor.GetProtoId());
362 GetMetaData(method, methodId);
363
364 if (!method->HasImplementation()) {
365 return;
366 }
367
368 if (methodAccessor.GetCodeId().has_value()) {
369 const IdList idList = GetInstructions(method, methodId, methodAccessor.GetCodeId().value());
370
371 for (const auto &id : idList) {
372 AddMethodToTables(id);
373 }
374 } else {
375 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << methodId << " (0x" << std::hex << methodId
376 << "). implementation of method expected, but no \'CODE\' tag was found!";
377
378 return;
379 }
380 }
381
382 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * litArray,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const383 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *litArray, const panda_file::LiteralTag &tag,
384 const panda_file::LiteralDataAccessor::LiteralValue &value) const
385 {
386 panda_file::File::EntityId id(std::get<uint32_t>(value));
387 auto sp = file_->GetSpanFromId(id);
388 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
389 if (tag != panda_file::LiteralTag::ARRAY_STRING) {
390 for (size_t i = 0; i < len; i++) {
391 pandasm::LiteralArray::Literal lit;
392 lit.tag = tag;
393 lit.value = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
394 litArray->literals.push_back(lit);
395 }
396 } else {
397 for (size_t i = 0; i < len; i++) {
398 auto strId = panda_file::helpers::Read<sizeof(T)>(&sp);
399 pandasm::LiteralArray::Literal lit;
400 lit.tag = tag;
401 lit.value = StringDataToString(file_->GetStringData(panda_file::File::EntityId(strId)));
402 litArray->literals.push_back(lit);
403 }
404 }
405 }
406
FillLiteralData(pandasm::LiteralArray * litArray,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const407 void Disassembler::FillLiteralData(pandasm::LiteralArray *litArray,
408 const panda_file::LiteralDataAccessor::LiteralValue &value,
409 const panda_file::LiteralTag &tag) const
410 {
411 pandasm::LiteralArray::Literal lit;
412 lit.tag = tag;
413 switch (tag) {
414 case panda_file::LiteralTag::BOOL: {
415 lit.value = std::get<bool>(value);
416 break;
417 }
418 case panda_file::LiteralTag::ACCESSOR:
419 case panda_file::LiteralTag::NULLVALUE: {
420 lit.value = std::get<uint8_t>(value);
421 break;
422 }
423 case panda_file::LiteralTag::METHODAFFILIATE: {
424 lit.value = std::get<uint16_t>(value);
425 break;
426 }
427 case panda_file::LiteralTag::INTEGER: {
428 lit.value = std::get<uint32_t>(value);
429 break;
430 }
431 case panda_file::LiteralTag::BIGINT: {
432 lit.value = std::get<uint64_t>(value);
433 break;
434 }
435 case panda_file::LiteralTag::FLOAT: {
436 lit.value = std::get<float>(value);
437 break;
438 }
439 case panda_file::LiteralTag::DOUBLE: {
440 lit.value = std::get<double>(value);
441 break;
442 }
443 case panda_file::LiteralTag::STRING:
444 case panda_file::LiteralTag::METHOD:
445 case panda_file::LiteralTag::GENERATORMETHOD: {
446 auto strData = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
447 lit.value = StringDataToString(strData);
448 break;
449 }
450 case panda_file::LiteralTag::TAGVALUE: {
451 return;
452 }
453 default: {
454 LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
455 UNREACHABLE();
456 }
457 }
458 litArray->literals.push_back(lit);
459 }
460
GetLiteralArray(pandasm::LiteralArray * litArray,const size_t index)461 void Disassembler::GetLiteralArray(pandasm::LiteralArray *litArray, const size_t index)
462 {
463 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal array]\nindex: " << index;
464
465 panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
466
467 // clang-format off
468 litArrayAccessor.EnumerateLiteralVals(index,
469 [this, litArray](const panda_file::LiteralDataAccessor::LiteralValue &value,
470 const panda_file::LiteralTag &tag) {
471 switch (tag) {
472 case panda_file::LiteralTag::ARRAY_U1: {
473 FillLiteralArrayData<bool>(litArray, tag, value);
474 break;
475 }
476 case panda_file::LiteralTag::ARRAY_I8:
477 case panda_file::LiteralTag::ARRAY_U8: {
478 FillLiteralArrayData<uint8_t>(litArray, tag, value);
479 break;
480 }
481 case panda_file::LiteralTag::ARRAY_I16:
482 case panda_file::LiteralTag::ARRAY_U16: {
483 FillLiteralArrayData<uint16_t>(litArray, tag, value);
484 break;
485 }
486 case panda_file::LiteralTag::ARRAY_I32:
487 case panda_file::LiteralTag::ARRAY_U32: {
488 FillLiteralArrayData<uint32_t>(litArray, tag, value);
489 break;
490 }
491 case panda_file::LiteralTag::ARRAY_I64:
492 case panda_file::LiteralTag::ARRAY_U64: {
493 FillLiteralArrayData<uint64_t>(litArray, tag, value);
494 break;
495 }
496 case panda_file::LiteralTag::ARRAY_F32: {
497 FillLiteralArrayData<float>(litArray, tag, value);
498 break;
499 }
500 case panda_file::LiteralTag::ARRAY_F64: {
501 FillLiteralArrayData<double>(litArray, tag, value);
502 break;
503 }
504 case panda_file::LiteralTag::ARRAY_STRING: {
505 FillLiteralArrayData<uint32_t>(litArray, tag, value);
506 break;
507 }
508 default: {
509 FillLiteralData(litArray, value, tag);
510 }
511 }
512 });
513 // clang-format on
514 }
515
GetLiteralArrays()516 void Disassembler::GetLiteralArrays()
517 {
518 const auto litArraysId = file_->GetLiteralArraysId();
519
520 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << litArraysId << " (0x" << std::hex << litArraysId
521 << ")";
522
523 panda_file::LiteralDataAccessor litArrayAccessor(*file_, litArraysId);
524 size_t numLitarrays = litArrayAccessor.GetLiteralNum();
525 for (size_t index = 0; index < numLitarrays; index++) {
526 panda::pandasm::LiteralArray litAr;
527 GetLiteralArray(&litAr, index);
528 prog_.literalarrayTable.emplace(std::to_string(index), litAr);
529 }
530 }
531
GetRecords()532 void Disassembler::GetRecords()
533 {
534 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
535
536 const auto classIdx = file_->GetClasses();
537
538 for (size_t i = 0; i < classIdx.size(); i++) {
539 uint32_t classId = classIdx[i];
540 auto classOff = file_->GetHeader()->classIdxOff + sizeof(uint32_t) * i;
541
542 if (classId > file_->GetHeader()->fileSize) {
543 LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << classOff << " (0x" << std::hex
544 << classOff << "). binary file corrupted. record offset (0x" << classId
545 << ") out of bounds (0x" << file_->GetHeader()->fileSize << ")!";
546 break;
547 }
548
549 const panda_file::File::EntityId recordId {classId};
550 auto language = GetRecordLanguage(recordId);
551 if (language != fileLanguage_) {
552 if (fileLanguage_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
553 fileLanguage_ = language;
554 } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
555 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << classOff << " (0x"
556 << std::hex << classOff << "). record's language ("
557 << panda_file::LanguageToString(language)
558 << ") differs from file's language ("
559 << panda_file::LanguageToString(fileLanguage_) << ")!";
560 }
561 }
562
563 pandasm::Record record("", fileLanguage_);
564 GetRecord(&record, recordId);
565
566 if (prog_.recordTable.find(record.name) == prog_.recordTable.end()) {
567 recordNameToId_.emplace(record.name, recordId);
568 prog_.recordTable.emplace(record.name, std::move(record));
569 }
570 }
571 }
572
GetField(pandasm::Field & field,const panda_file::FieldDataAccessor & fieldAccessor)573 void Disassembler::GetField(pandasm::Field &field, const panda_file::FieldDataAccessor &fieldAccessor)
574 {
575 panda_file::File::EntityId fieldNameId = fieldAccessor.GetNameId();
576 field.name = StringDataToString(file_->GetStringData(fieldNameId));
577
578 uint32_t fieldType = fieldAccessor.GetType();
579 field.type = FieldTypeToPandasmType(fieldType);
580
581 GetMetaData(&field, fieldAccessor.GetFieldId());
582 }
583
GetFields(pandasm::Record * record,const panda_file::File::EntityId & recordId)584 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &recordId)
585 {
586 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
587
588 classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
589 pandasm::Field field(fileLanguage_);
590
591 GetField(field, fieldAccessor);
592
593 record->fieldList.push_back(std::move(field));
594 });
595 }
596
AddExternalFieldsToRecords()597 void Disassembler::AddExternalFieldsToRecords()
598 {
599 for (auto &[record_name, record] : prog_.recordTable) {
600 auto &[unused, field_list] = *(externalFieldTable_.find(record_name));
601 (void)unused;
602 if (field_list.empty()) {
603 continue;
604 }
605 for (auto &fieldIter : field_list) {
606 if (!fieldIter.name.empty()) {
607 record.fieldList.push_back(std::move(fieldIter));
608 }
609 }
610 externalFieldTable_.erase(record_name);
611 }
612 }
613
GetMethods(const panda_file::File::EntityId & recordId)614 void Disassembler::GetMethods(const panda_file::File::EntityId &recordId)
615 {
616 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
617
618 classAccessor.EnumerateMethods([&](panda_file::MethodDataAccessor &methodAccessor) -> void {
619 AddMethodToTables(methodAccessor.GetMethodId());
620 });
621 }
622
GetParams(pandasm::Function * method,const panda_file::File::EntityId & protoId) const623 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &protoId) const
624 {
625 /// frame size - 2^16 - 1
626 static const uint32_t MAX_ARG_NUM = 0xFFFF;
627
628 LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << protoId << " (0x" << std::hex << protoId << ")";
629
630 if (method == nullptr) {
631 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
632
633 return;
634 }
635
636 panda_file::ProtoDataAccessor protoAccessor(*file_, protoId);
637
638 if (protoAccessor.GetNumArgs() > MAX_ARG_NUM) {
639 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << protoId << " (0x" << std::hex << protoId
640 << "). number of function's arguments (" << std::dec << protoAccessor.GetNumArgs()
641 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
642
643 return;
644 }
645
646 size_t refIdx = 0;
647 method->returnType = PFTypeToPandasmType(protoAccessor.GetReturnType(), protoAccessor, refIdx);
648
649 for (size_t i = 0; i < protoAccessor.GetNumArgs(); i++) {
650 auto argType = PFTypeToPandasmType(protoAccessor.GetArgType(i), protoAccessor, refIdx);
651 method->params.emplace_back(argType, fileLanguage_);
652 }
653 }
654
GetExceptions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId) const655 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId methodId,
656 panda_file::File::EntityId codeId) const
657 {
658 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << codeId << " (0x" << std::hex << codeId << ")";
659
660 if (method == nullptr) {
661 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
662 return LabelTable {};
663 }
664
665 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
666
667 const auto bcIns = BytecodeInstruction(codeAccessor.GetInstructions());
668 const auto bcInsLast = bcIns.JumpTo(codeAccessor.GetCodeSize());
669
670 size_t tryIdx = 0;
671 LabelTable labelTable {};
672 codeAccessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &tryBlock) {
673 pandasm::Function::CatchBlock catchBlockPa {};
674 if (!LocateTryBlock(bcIns, bcInsLast, tryBlock, &catchBlockPa, &labelTable, tryIdx)) {
675 return false;
676 }
677 size_t catchIdx = 0;
678 tryBlock.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catchBlock) {
679 auto classIdx = catchBlock.GetTypeIdx();
680 if (classIdx == panda_file::INVALID_INDEX) {
681 catchBlockPa.exceptionRecord = "";
682 } else {
683 const auto classId = file_->ResolveClassIndex(methodId, classIdx);
684 catchBlockPa.exceptionRecord = GetFullRecordName(classId);
685 }
686 if (!LocateCatchBlock(bcIns, bcInsLast, catchBlock, &catchBlockPa, &labelTable, tryIdx, catchIdx)) {
687 return false;
688 }
689
690 method->catchBlocks.push_back(catchBlockPa);
691 catchBlockPa.catchBeginLabel = "";
692 catchBlockPa.catchEndLabel = "";
693 catchIdx++;
694
695 return true;
696 });
697 tryIdx++;
698
699 return true;
700 });
701
702 return labelTable;
703 }
704
GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst,BytecodeInstruction bcInsCur)705 static size_t GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst, BytecodeInstruction bcInsCur)
706 {
707 size_t count = 0;
708
709 while (bcInsFirst.GetAddress() != bcInsCur.GetAddress()) {
710 count++;
711 bcInsFirst = bcInsFirst.GetNext();
712 if (bcInsFirst.GetAddress() > bcInsCur.GetAddress()) {
713 return std::numeric_limits<size_t>::max();
714 }
715 }
716
717 return count;
718 }
719
LocateTryBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::TryBlock & tryBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx) const720 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
721 const panda_file::CodeDataAccessor::TryBlock &tryBlock,
722 pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable,
723 size_t tryIdx) const
724 {
725 const auto tryBeginBcIns = bcIns.JumpTo(tryBlock.GetStartPc());
726 const auto tryEndBcIns = bcIns.JumpTo(tryBlock.GetStartPc() + tryBlock.GetLength());
727
728 const size_t tryBeginIdx = GetBytecodeInstructionNumber(bcIns, tryBeginBcIns);
729 const size_t tryEndIdx = GetBytecodeInstructionNumber(bcIns, tryEndBcIns);
730
731 const bool tryBeginOffsetInRange = bcInsLast.GetAddress() > tryBeginBcIns.GetAddress();
732 const bool tryEndOffsetInRange = bcInsLast.GetAddress() >= tryEndBcIns.GetAddress();
733 const bool tryBeginOffsetValid = tryBeginIdx != std::numeric_limits<size_t>::max();
734 const bool tryEndOffsetValid = tryEndIdx != std::numeric_limits<size_t>::max();
735
736 if (!tryBeginOffsetInRange || !tryBeginOffsetValid) {
737 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
738 << tryBeginBcIns.GetAddress();
739 return false;
740 }
741
742 auto itBegin = labelTable->find(tryBeginIdx);
743 if (itBegin == labelTable->end()) {
744 std::stringstream ss {};
745 ss << "try_begin_label_" << tryIdx;
746 catchBlockPa->tryBeginLabel = ss.str();
747 labelTable->insert(std::pair<size_t, std::string>(tryBeginIdx, ss.str()));
748 } else {
749 catchBlockPa->tryBeginLabel = itBegin->second;
750 }
751
752 if (!tryEndOffsetInRange || !tryEndOffsetValid) {
753 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
754 << tryEndBcIns.GetAddress();
755 return false;
756 }
757
758 auto itEnd = labelTable->find(tryEndIdx);
759 if (itEnd == labelTable->end()) {
760 std::stringstream ss {};
761 ss << "try_end_label_" << tryIdx;
762 catchBlockPa->tryEndLabel = ss.str();
763 labelTable->insert(std::pair<size_t, std::string>(tryEndIdx, ss.str()));
764 } else {
765 catchBlockPa->tryEndLabel = itEnd->second;
766 }
767
768 return true;
769 }
770
LocateCatchBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::CatchBlock & catchBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx,size_t catchIdx) const771 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
772 const panda_file::CodeDataAccessor::CatchBlock &catchBlock,
773 pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable, size_t tryIdx,
774 size_t catchIdx) const
775 {
776 const auto handlerBeginOffset = catchBlock.GetHandlerPc();
777 const auto handlerEndOffset = handlerBeginOffset + catchBlock.GetCodeSize();
778
779 const auto handlerBeginBcIns = bcIns.JumpTo(handlerBeginOffset);
780 const auto handlerEndBcIns = bcIns.JumpTo(handlerEndOffset);
781
782 const size_t handlerBeginIdx = GetBytecodeInstructionNumber(bcIns, handlerBeginBcIns);
783 const size_t handlerEndIdx = GetBytecodeInstructionNumber(bcIns, handlerEndBcIns);
784
785 const bool handlerBeginOffsetInRange = bcInsLast.GetAddress() > handlerBeginBcIns.GetAddress();
786 const bool handlerEndOffsetInRange = bcInsLast.GetAddress() > handlerEndBcIns.GetAddress();
787 const bool handlerEndPresent = catchBlock.GetCodeSize() != 0;
788 const bool handlerBeginOffsetValid = handlerBeginIdx != std::numeric_limits<size_t>::max();
789 const bool handlerEndOffsetValid = handlerEndIdx != std::numeric_limits<size_t>::max();
790
791 if (!handlerBeginOffsetInRange || !handlerBeginOffsetValid) {
792 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
793 << handlerBeginBcIns.GetAddress();
794 return false;
795 }
796
797 auto itBegin = labelTable->find(handlerBeginIdx);
798 if (itBegin == labelTable->end()) {
799 std::stringstream ss {};
800 ss << "handler_begin_label_" << tryIdx << "_" << catchIdx;
801 catchBlockPa->catchBeginLabel = ss.str();
802 labelTable->insert(std::pair<size_t, std::string>(handlerBeginIdx, ss.str()));
803 } else {
804 catchBlockPa->catchBeginLabel = itBegin->second;
805 }
806
807 if (!handlerEndOffsetInRange || !handlerEndOffsetValid) {
808 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
809 << handlerEndBcIns.GetAddress();
810 return false;
811 }
812
813 if (handlerEndPresent) {
814 auto itEnd = labelTable->find(handlerEndIdx);
815 if (itEnd == labelTable->end()) {
816 std::stringstream ss {};
817 ss << "handler_end_label_" << tryIdx << "_" << catchIdx;
818 catchBlockPa->catchEndLabel = ss.str();
819 labelTable->insert(std::pair<size_t, std::string>(handlerEndIdx, ss.str()));
820 } else {
821 catchBlockPa->catchEndLabel = itEnd->second;
822 }
823 }
824
825 return true;
826 }
827
828 template <typename T>
SetEntityAttribute(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute)829 static void SetEntityAttribute(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute)
830 {
831 if (shouldSet()) {
832 auto err = entity->metadata->SetAttribute(attribute);
833 if (err.has_value()) {
834 LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
835 }
836 }
837 }
838
839 template <typename T>
SetEntityAttributeValue(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute,const char * value)840 static void SetEntityAttributeValue(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute,
841 const char *value)
842 {
843 if (shouldSet()) {
844 auto err = entity->metadata->SetAttributeValue(attribute, value);
845 if (err.has_value()) {
846 LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
847 }
848 }
849 }
850
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & methodId) const851 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &methodId) const
852 {
853 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << methodId << " (0x" << std::hex << methodId << ")";
854
855 if (method == nullptr) {
856 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
857
858 return;
859 }
860
861 panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
862
863 const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
864
865 if (!methodAccessor.IsStatic()) {
866 const auto className = StringDataToString(file_->GetStringData(methodAccessor.GetClassId()));
867 auto thisType = pandasm::Type::FromDescriptor(className);
868
869 LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << methodNameRaw
870 << "\') is not static. emplacing self-argument of type " << thisType.GetName();
871
872 method->params.insert(method->params.begin(), pandasm::Function::Parameter(thisType, fileLanguage_));
873 }
874 SetEntityAttribute(
875 method, [&methodAccessor]() { return methodAccessor.IsStatic(); }, "static");
876
877 SetEntityAttribute(
878 method, [this, &methodAccessor]() { return file_->IsExternal(methodAccessor.GetMethodId()); }, "external");
879
880 SetEntityAttribute(
881 method, [&methodAccessor]() { return methodAccessor.IsNative(); }, "native");
882
883 SetEntityAttribute(
884 method, [&methodAccessor]() { return methodAccessor.IsAbstract(); }, "noimpl");
885
886 SetEntityAttributeValue(
887 method, [&methodAccessor]() { return methodAccessor.IsPublic(); }, "access.function", "public");
888
889 SetEntityAttributeValue(
890 method, [&methodAccessor]() { return methodAccessor.IsProtected(); }, "access.function", "protected");
891
892 SetEntityAttributeValue(
893 method, [&methodAccessor]() { return methodAccessor.IsPrivate(); }, "access.function", "private");
894
895 SetEntityAttribute(
896 method, [&methodAccessor]() { return methodAccessor.IsFinal(); }, "final");
897
898 std::string ctorName = panda::panda_file::GetCtorName(fileLanguage_);
899 std::string cctorName = panda::panda_file::GetCctorName(fileLanguage_);
900
901 const bool isCtor = (methodNameRaw == ctorName);
902 const bool isCctor = (methodNameRaw == cctorName);
903
904 if (isCtor) {
905 method->metadata->SetAttribute("ctor");
906 method->name.replace(method->name.find(ctorName), ctorName.length(), "_ctor_");
907 } else if (isCctor) {
908 method->metadata->SetAttribute("cctor");
909 method->name.replace(method->name.find(cctorName), cctorName.length(), "_cctor_");
910 }
911 }
912
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & recordId) const913 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &recordId) const
914 {
915 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << recordId << " (0x" << std::hex << recordId << ")";
916
917 if (record == nullptr) {
918 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
919
920 return;
921 }
922
923 SetEntityAttribute(
924 record, [this, recordId]() { return file_->IsExternal(recordId); }, "external");
925
926 auto external = file_->IsExternal(recordId);
927 if (!external) {
928 auto cda = panda_file::ClassDataAccessor {*file_, recordId};
929 SetEntityAttributeValue(
930 record, [&cda]() { return cda.IsPublic(); }, "access.record", "public");
931
932 SetEntityAttributeValue(
933 record, [&cda]() { return cda.IsProtected(); }, "access.record", "protected");
934
935 SetEntityAttributeValue(
936 record, [&cda]() { return cda.IsPrivate(); }, "access.record", "private");
937
938 SetEntityAttribute(
939 record, [&cda]() { return cda.IsFinal(); }, "final");
940 }
941 }
942
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & fieldId) const943 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &fieldId) const
944 {
945 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << fieldId << " (0x" << std::hex << fieldId << ")";
946
947 if (field == nullptr) {
948 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
949
950 return;
951 }
952
953 panda_file::FieldDataAccessor fieldAccessor(*file_, fieldId);
954
955 SetEntityAttribute(
956 field, [&fieldAccessor]() { return fieldAccessor.IsExternal(); }, "external");
957
958 SetEntityAttribute(
959 field, [&fieldAccessor]() { return fieldAccessor.IsStatic(); }, "static");
960
961 SetEntityAttributeValue(
962 field, [&fieldAccessor]() { return fieldAccessor.IsPublic(); }, "access.field", "public");
963
964 SetEntityAttributeValue(
965 field, [&fieldAccessor]() { return fieldAccessor.IsProtected(); }, "access.field", "protected");
966
967 SetEntityAttributeValue(
968 field, [&fieldAccessor]() { return fieldAccessor.IsPrivate(); }, "access.field", "private");
969
970 SetEntityAttribute(
971 field, [&fieldAccessor]() { return fieldAccessor.IsFinal(); }, "final");
972 }
973
AnnotationTagToString(const char tag) const974 std::string Disassembler::AnnotationTagToString(const char tag) const
975 {
976 static const std::unordered_map<char, std::string> TAG_TO_STRING = {{'1', "u1"},
977 {'2', "i8"},
978 {'3', "u8"},
979 {'4', "i16"},
980 {'5', "u16"},
981 {'6', "i32"},
982 {'7', "u32"},
983 {'8', "i64"},
984 {'9', "u64"},
985 {'A', "f32"},
986 {'B', "f64"},
987 {'C', "string"},
988 {'D', "record"},
989 {'E', "method"},
990 {'F', "enum"},
991 {'G', "annotation"},
992 {'J', "method_handle"},
993 {'H', "array"},
994 {'K', "u1[]"},
995 {'L', "i8[]"},
996 {'M', "u8[]"},
997 {'N', "i16[]"},
998 {'O', "u16[]"},
999 {'P', "i32[]"},
1000 {'Q', "u32[]"},
1001 {'R', "i64[]"},
1002 {'S', "u64[]"},
1003 {'T', "f32[]"},
1004 {'U', "f64[]"},
1005 {'V', "string[]"},
1006 {'W', "record[]"},
1007 {'X', "method[]"},
1008 {'Y', "enum[]"},
1009 {'Z', "annotation[]"},
1010 {'@', "method_handle[]"},
1011 {'*', "nullptr_string"}};
1012
1013 return TAG_TO_STRING.at(tag);
1014 }
1015
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1016 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1017 {
1018 std::stringstream ss;
1019
1020 if (type == "i8") {
1021 auto res = value.Get<int8_t>();
1022 ss << static_cast<int>(res);
1023 } else if (type == "u1" || type == "u8") {
1024 auto res = value.Get<uint8_t>();
1025 ss << static_cast<unsigned int>(res);
1026 } else if (type == "i16") {
1027 ss << value.Get<int16_t>();
1028 } else if (type == "u16") {
1029 ss << value.Get<uint16_t>();
1030 } else if (type == "i32") {
1031 ss << value.Get<int32_t>();
1032 } else if (type == "u32") {
1033 ss << value.Get<uint32_t>();
1034 } else if (type == "i64") {
1035 ss << value.Get<int64_t>();
1036 } else if (type == "u64") {
1037 ss << value.Get<uint64_t>();
1038 } else if (type == "f32") {
1039 ss << value.Get<float>();
1040 } else if (type == "f64") {
1041 ss << value.Get<double>();
1042 } else if (type == "string") {
1043 const auto id = value.Get<panda_file::File::EntityId>();
1044 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1045 } else if (type == "record") {
1046 const auto id = value.Get<panda_file::File::EntityId>();
1047 ss << GetFullRecordName(id);
1048 } else if (type == "method") {
1049 const auto id = value.Get<panda_file::File::EntityId>();
1050 AddMethodToTables(id);
1051 ss << GetMethodSignature(id);
1052 } else if (type == "enum") {
1053 const auto id = value.Get<panda_file::File::EntityId>();
1054 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1055 ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1056 << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1057 } else if (type == "annotation") {
1058 const auto id = value.Get<panda_file::File::EntityId>();
1059 ss << "id_" << id;
1060 } else if (type == "void") {
1061 return std::string();
1062 } else if (type == "method_handle") {
1063 } else if (type == "nullptr_string") {
1064 ss << static_cast<uint32_t>(0);
1065 }
1066
1067 return ss.str();
1068 }
1069
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1070 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1071 const size_t idx)
1072 {
1073 std::stringstream ss;
1074
1075 if (type == "i8") {
1076 auto res = value.Get<int8_t>(idx);
1077 ss << static_cast<int>(res);
1078 } else if (type == "u1" || type == "u8") {
1079 auto res = value.Get<uint8_t>(idx);
1080 ss << static_cast<unsigned int>(res);
1081 } else if (type == "i16") {
1082 ss << value.Get<int16_t>(idx);
1083 } else if (type == "u16") {
1084 ss << value.Get<uint16_t>(idx);
1085 } else if (type == "i32") {
1086 ss << value.Get<int32_t>(idx);
1087 } else if (type == "u32") {
1088 ss << value.Get<uint32_t>(idx);
1089 } else if (type == "i64") {
1090 ss << value.Get<int64_t>(idx);
1091 } else if (type == "u64") {
1092 ss << value.Get<uint64_t>(idx);
1093 } else if (type == "f32") {
1094 ss << value.Get<float>(idx);
1095 } else if (type == "f64") {
1096 ss << value.Get<double>(idx);
1097 } else if (type == "string") {
1098 const auto id = value.Get<panda_file::File::EntityId>(idx);
1099 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1100 } else if (type == "record") {
1101 const auto id = value.Get<panda_file::File::EntityId>(idx);
1102 ss << GetFullRecordName(id);
1103 } else if (type == "method") {
1104 const auto id = value.Get<panda_file::File::EntityId>(idx);
1105 AddMethodToTables(id);
1106 ss << GetMethodSignature(id);
1107 } else if (type == "enum") {
1108 const auto id = value.Get<panda_file::File::EntityId>(idx);
1109 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1110 ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1111 << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1112 } else if (type == "annotation") {
1113 const auto id = value.Get<panda_file::File::EntityId>(idx);
1114 ss << "id_" << id;
1115 } else if (type == "method_handle") {
1116 } else if (type == "nullptr_string") {
1117 }
1118
1119 return ss.str();
1120 }
1121
GetFullMethodName(const panda_file::File::EntityId & methodId) const1122 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &methodId) const
1123 {
1124 panda::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1125
1126 const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
1127
1128 std::string className = GetFullRecordName(methodAccessor.GetClassId());
1129 if (IsSystemType(className)) {
1130 className = "";
1131 } else {
1132 className += ".";
1133 }
1134
1135 return className + methodNameRaw;
1136 }
1137
GetMethodSignature(const panda_file::File::EntityId & methodId) const1138 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &methodId) const
1139 {
1140 panda::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1141
1142 pandasm::Function method(GetFullMethodName(methodId), fileLanguage_);
1143 GetParams(&method, methodAccessor.GetProtoId());
1144 GetMetaData(&method, methodId);
1145
1146 return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1147 }
1148
GetFullRecordName(const panda_file::File::EntityId & classId) const1149 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &classId) const
1150 {
1151 std::string name = StringDataToString(file_->GetStringData(classId));
1152
1153 auto type = pandasm::Type::FromDescriptor(name);
1154 type = pandasm::Type(type.GetComponentName(), type.GetRank());
1155
1156 return type.GetPandasmName();
1157 }
1158
GetRecordInfo(const panda_file::File::EntityId & recordId,RecordInfo * recordInfo) const1159 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &recordId, RecordInfo *recordInfo) const
1160 {
1161 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1162
1163 if (file_->IsExternal(recordId)) {
1164 return;
1165 }
1166
1167 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
1168 std::stringstream ss;
1169
1170 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << classAccessor.GetClassId()
1171 << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << classAccessor.GetSize() << " ("
1172 << std::dec << classAccessor.GetSize() << ")";
1173
1174 recordInfo->recordInfo = ss.str();
1175 ss.str(std::string());
1176
1177 classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
1178 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1179 << fieldAccessor.GetFieldId() << ", type: 0x" << fieldAccessor.GetType();
1180
1181 recordInfo->fieldsInfo.push_back(ss.str());
1182
1183 ss.str(std::string());
1184 });
1185 }
1186
GetMethodInfo(const panda_file::File::EntityId & methodId,MethodInfo * methodInfo) const1187 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &methodId, MethodInfo *methodInfo) const
1188 {
1189 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1190
1191 panda_file::MethodDataAccessor methodAccessor {*file_, methodId};
1192 std::stringstream ss;
1193
1194 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1195 << methodAccessor.GetMethodId();
1196
1197 if (methodAccessor.GetCodeId().has_value()) {
1198 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1199 << methodAccessor.GetCodeId().value();
1200
1201 GetInsInfo(methodAccessor, methodAccessor.GetCodeId().value(), methodInfo);
1202 } else {
1203 ss << ", <no code>";
1204 }
1205
1206 auto profileSize = methodAccessor.GetProfileSize();
1207 if (profileSize) {
1208 ss << ", profile size: " << profileSize.value();
1209 }
1210
1211 methodInfo->methodInfo = ss.str();
1212
1213 if (methodAccessor.GetCodeId()) {
1214 ASSERT(debugInfoExtractor_ != nullptr);
1215 methodInfo->lineNumberTable = debugInfoExtractor_->GetLineNumberTable(methodId);
1216 methodInfo->localVariableTable = debugInfoExtractor_->GetLocalVariableTable(methodId);
1217
1218 // Add information about parameters into the table
1219 panda_file::CodeDataAccessor codeda(*file_, methodAccessor.GetCodeId().value());
1220 auto argIdx = static_cast<int32_t>(codeda.GetNumVregs());
1221 uint32_t codeSize = codeda.GetCodeSize();
1222 for (const auto &info : debugInfoExtractor_->GetParameterInfo(methodId)) {
1223 panda_file::LocalVariableInfo argInfo {info.name, info.signature, "", argIdx++, 0, codeSize};
1224 methodInfo->localVariableTable.emplace_back(argInfo);
1225 }
1226 }
1227 }
1228
Serialize(const std::string & name,const pandasm::LiteralArray & litArray,std::ostream & os) const1229 void Disassembler::Serialize(const std::string &name, const pandasm::LiteralArray &litArray, std::ostream &os) const
1230 {
1231 if (litArray.literals.empty()) {
1232 return;
1233 }
1234
1235 bool isConst = litArray.literals[0].IsArray();
1236
1237 std::stringstream specifiers {};
1238
1239 if (isConst) {
1240 specifiers << LiteralTagToString(litArray.literals[0].tag) << " " << litArray.literals.size() << " ";
1241 }
1242
1243 os << ".array array_" << name << " " << specifiers.str() << "{";
1244
1245 SerializeValues(litArray, isConst, os);
1246
1247 os << "}\n";
1248 }
1249
LiteralTagToString(const panda_file::LiteralTag & tag) const1250 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1251 {
1252 switch (tag) {
1253 case panda_file::LiteralTag::BOOL:
1254 case panda_file::LiteralTag::ARRAY_U1:
1255 return "u1";
1256 case panda_file::LiteralTag::ARRAY_U8:
1257 return "u8";
1258 case panda_file::LiteralTag::ARRAY_I8:
1259 return "i8";
1260 case panda_file::LiteralTag::ARRAY_U16:
1261 return "u16";
1262 case panda_file::LiteralTag::ARRAY_I16:
1263 return "i16";
1264 case panda_file::LiteralTag::ARRAY_U32:
1265 return "u32";
1266 case panda_file::LiteralTag::INTEGER:
1267 case panda_file::LiteralTag::ARRAY_I32:
1268 return "i32";
1269 case panda_file::LiteralTag::ARRAY_U64:
1270 return "u64";
1271 case panda_file::LiteralTag::BIGINT:
1272 case panda_file::LiteralTag::ARRAY_I64:
1273 return "i64";
1274 case panda_file::LiteralTag::FLOAT:
1275 case panda_file::LiteralTag::ARRAY_F32:
1276 return "f32";
1277 case panda_file::LiteralTag::DOUBLE:
1278 case panda_file::LiteralTag::ARRAY_F64:
1279 return "f64";
1280 case panda_file::LiteralTag::STRING:
1281 case panda_file::LiteralTag::ARRAY_STRING:
1282 return pandasm::Type::FromDescriptor(panda_file::GetStringClassDescriptor(fileLanguage_)).GetPandasmName();
1283 case panda_file::LiteralTag::ACCESSOR:
1284 return "accessor";
1285 case panda_file::LiteralTag::NULLVALUE:
1286 return "nullvalue";
1287 case panda_file::LiteralTag::METHODAFFILIATE:
1288 return "method_affiliate";
1289 case panda_file::LiteralTag::METHOD:
1290 return "method";
1291 case panda_file::LiteralTag::GENERATORMETHOD:
1292 return "generator_method";
1293 default:
1294 LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
1295 UNREACHABLE();
1296 }
1297 }
1298
LiteralValueToString(const pandasm::LiteralArray::Literal & lit) const1299 std::string Disassembler::LiteralValueToString(const pandasm::LiteralArray::Literal &lit) const
1300 {
1301 if (lit.IsBoolValue()) {
1302 std::stringstream res {};
1303 res << (std::get<bool>(lit.value));
1304 return res.str();
1305 }
1306
1307 if (lit.IsByteValue()) {
1308 return LiteralIntegralValueToString<uint8_t>(lit);
1309 }
1310
1311 if (lit.IsShortValue()) {
1312 return LiteralIntegralValueToString<uint16_t>(lit);
1313 }
1314
1315 if (lit.IsIntegerValue()) {
1316 return LiteralIntegralValueToString<uint32_t>(lit);
1317 }
1318
1319 if (lit.IsLongValue()) {
1320 return LiteralIntegralValueToString<uint64_t>(lit);
1321 }
1322
1323 if (lit.IsDoubleValue()) {
1324 std::stringstream res {};
1325 res << std::get<double>(lit.value);
1326 return res.str();
1327 }
1328
1329 if (lit.IsFloatValue()) {
1330 std::stringstream res {};
1331 res << std::get<float>(lit.value);
1332 return res.str();
1333 }
1334
1335 if (lit.IsStringValue()) {
1336 std::stringstream res {};
1337 res << "\"" << std::get<std::string>(lit.value) << "\"";
1338 return res.str();
1339 }
1340
1341 UNREACHABLE();
1342 }
1343
SerializeValues(const pandasm::LiteralArray & litArray,const bool isConst,std::ostream & os) const1344 void Disassembler::SerializeValues(const pandasm::LiteralArray &litArray, const bool isConst, std::ostream &os) const
1345 {
1346 std::string separator = (isConst) ? (" ") : ("\n");
1347
1348 os << separator;
1349
1350 if (isConst) {
1351 for (const auto &l : litArray.literals) {
1352 os << LiteralValueToString(l) << separator;
1353 }
1354 } else {
1355 for (const auto &l : litArray.literals) {
1356 os << "\t" << LiteralTagToString(l.tag) << " " << LiteralValueToString(l) << separator;
1357 }
1358 }
1359 }
1360
Serialize(const pandasm::Record & record,std::ostream & os,bool printInformation) const1361 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1362 {
1363 if (IsSystemType(record.name)) {
1364 return;
1365 }
1366
1367 os << ".record " << record.name;
1368
1369 const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1370 const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1371 if (recordInTable) {
1372 Serialize(*record.metadata, recordIter->second.annList, os);
1373 } else {
1374 Serialize(*record.metadata, {}, os);
1375 }
1376
1377 if (record.metadata->IsForeign() && record.fieldList.empty()) {
1378 os << "\n\n";
1379 return;
1380 }
1381
1382 os << " {";
1383
1384 if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1385 os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1386 SerializeFields(record, os, true);
1387 } else {
1388 os << "\n";
1389 SerializeFields(record, os, false);
1390 }
1391
1392 os << "}\n\n";
1393 }
1394
SerializeUnionFields(const pandasm::Record & record,std::ostream & os,bool printInformation) const1395 void Disassembler::SerializeUnionFields(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1396 {
1397 if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1398 os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1399 SerializeFields(record, os, true, true);
1400 } else {
1401 SerializeFields(record, os, false, true);
1402 }
1403 os << "\n";
1404 }
1405
SerializeFields(const pandasm::Record & record,std::ostream & os,bool printInformation,bool isUnion) const1406 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool printInformation,
1407 bool isUnion) const
1408 {
1409 constexpr size_t INFO_OFFSET = 80;
1410
1411 const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1412 const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1413
1414 const auto recInf = (printInformation) ? (progInfo_.recordsInfo.at(record.name)) : (RecordInfo {});
1415
1416 size_t fieldIdx = 0;
1417
1418 std::stringstream ss;
1419 for (const auto &f : record.fieldList) {
1420 if (isUnion) {
1421 ss << ".union_field ";
1422 } else {
1423 ss << "\t";
1424 }
1425 ss << f.type.GetPandasmName() << " " << f.name;
1426 if (!isUnion) {
1427 if (recordInTable) {
1428 const auto fieldIter = recordIter->second.fieldAnnotations.find(f.name);
1429 if (fieldIter != recordIter->second.fieldAnnotations.end()) {
1430 Serialize(*f.metadata, fieldIter->second, ss);
1431 } else {
1432 Serialize(*f.metadata, {}, ss);
1433 }
1434 } else {
1435 Serialize(*f.metadata, {}, ss);
1436 }
1437 }
1438
1439 if (printInformation && !recInf.fieldsInfo.empty()) {
1440 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << recInf.fieldsInfo.at(fieldIdx) << "\n";
1441 } else {
1442 os << ss.str() << "\n";
1443 }
1444
1445 ss.str(std::string());
1446 ss.clear();
1447
1448 fieldIdx++;
1449 }
1450 }
1451
Serialize(const pandasm::Function::CatchBlock & catchBlock,std::ostream & os) const1452 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catchBlock, std::ostream &os) const
1453 {
1454 if (catchBlock.exceptionRecord.empty()) {
1455 os << ".catchall ";
1456 } else {
1457 os << ".catch " << catchBlock.exceptionRecord << ", ";
1458 }
1459
1460 os << catchBlock.tryBeginLabel << ", " << catchBlock.tryEndLabel << ", " << catchBlock.catchBeginLabel;
1461
1462 if (!catchBlock.catchEndLabel.empty()) {
1463 os << ", " << catchBlock.catchEndLabel;
1464 }
1465 }
1466
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & annList,std::ostream & os) const1467 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &annList, std::ostream &os) const
1468 {
1469 auto boolAttributes = meta.GetBoolAttributes();
1470 auto attributes = meta.GetAttributes();
1471 if (boolAttributes.empty() && attributes.empty() && annList.empty()) {
1472 return;
1473 }
1474
1475 os << " <";
1476
1477 size_t size = boolAttributes.size();
1478 size_t idx = 0;
1479 for (const auto &attr : boolAttributes) {
1480 os << attr;
1481 ++idx;
1482
1483 if (!attributes.empty() || !annList.empty() || idx < size) {
1484 os << ", ";
1485 }
1486 }
1487
1488 size = attributes.size();
1489 idx = 0;
1490 for (const auto &[key, values] : attributes) {
1491 for (size_t i = 0; i < values.size(); i++) {
1492 os << key << "=" << values[i];
1493
1494 if (i < values.size() - 1) {
1495 os << ", ";
1496 }
1497 }
1498
1499 ++idx;
1500
1501 if (!annList.empty() || idx < size) {
1502 os << ", ";
1503 }
1504 }
1505
1506 size = annList.size();
1507 idx = 0;
1508 for (const auto &[key, value] : annList) {
1509 os << key << "=" << value;
1510
1511 ++idx;
1512
1513 if (idx < size) {
1514 os << ", ";
1515 }
1516 }
1517
1518 os << ">";
1519 }
1520
SerializeLineNumberTable(const panda_file::LineNumberTable & lineNumberTable,std::ostream & os) const1521 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &lineNumberTable, std::ostream &os) const
1522 {
1523 if (lineNumberTable.empty()) {
1524 return;
1525 }
1526
1527 os << "\n# LINE_NUMBER_TABLE:\n";
1528 for (const auto &lineInfo : lineNumberTable) {
1529 os << "#\tline " << lineInfo.line << ": " << lineInfo.offset << "\n";
1530 }
1531 }
1532
SerializeLocalVariableTable(const panda_file::LocalVariableTable & localVariableTable,const pandasm::Function & method,std::ostream & os) const1533 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &localVariableTable,
1534 const pandasm::Function &method, std::ostream &os) const
1535 {
1536 if (localVariableTable.empty()) {
1537 return;
1538 }
1539
1540 os << "\n# LOCAL_VARIABLE_TABLE:\n";
1541 os << "#\t Start End Register Name Signature\n";
1542 const int startWidth = 5;
1543 const int endWidth = 4;
1544 const int regWidth = 8;
1545 const int nameWidth = 14;
1546 for (const auto &variableInfo : localVariableTable) {
1547 std::ostringstream regStream;
1548 regStream << variableInfo.regNumber << '(';
1549 if (variableInfo.regNumber < 0) {
1550 regStream << "acc";
1551 } else {
1552 uint32_t vreg = variableInfo.regNumber;
1553 uint32_t firstArgReg = method.GetTotalRegs();
1554 if (vreg < firstArgReg) {
1555 regStream << 'v' << vreg;
1556 } else {
1557 regStream << 'a' << vreg - firstArgReg;
1558 }
1559 }
1560 regStream << ')';
1561
1562 os << "#\t " << std::setw(startWidth) << std::right << variableInfo.startOffset << " ";
1563 os << std::setw(endWidth) << std::right << variableInfo.endOffset << " ";
1564 os << std::setw(regWidth) << std::right << regStream.str() << " ";
1565 os << std::setw(nameWidth) << std::right << variableInfo.name << " " << variableInfo.type;
1566 if (!variableInfo.typeSignature.empty() && variableInfo.typeSignature != variableInfo.type) {
1567 os << " (" << variableInfo.typeSignature << ")";
1568 }
1569 os << "\n";
1570 }
1571 }
1572
SerializeLanguage(std::ostream & os) const1573 void Disassembler::SerializeLanguage(std::ostream &os) const
1574 {
1575 os << ".language " << panda::panda_file::LanguageToString(fileLanguage_) << "\n\n";
1576 }
1577
SerializeFilename(std::ostream & os) const1578 void Disassembler::SerializeFilename(std::ostream &os) const
1579 {
1580 if (file_ == nullptr || file_->GetFilename().empty()) {
1581 return;
1582 }
1583
1584 os << "# source binary: " << file_->GetFilename() << "\n\n";
1585 }
1586
SerializeLitArrays(std::ostream & os,bool addSeparators) const1587 void Disassembler::SerializeLitArrays(std::ostream &os, bool addSeparators) const
1588 {
1589 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
1590
1591 if (prog_.literalarrayTable.empty()) {
1592 return;
1593 }
1594
1595 if (addSeparators) {
1596 os << "# ====================\n"
1597 "# LITERALS\n\n";
1598 }
1599
1600 for (const auto &pair : prog_.literalarrayTable) {
1601 Serialize(pair.first, pair.second, os);
1602 }
1603
1604 os << "\n";
1605 }
1606
SerializeRecords(std::ostream & os,bool addSeparators,bool printInformation) const1607 void Disassembler::SerializeRecords(std::ostream &os, bool addSeparators, bool printInformation) const
1608 {
1609 LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
1610
1611 if (prog_.recordTable.empty()) {
1612 return;
1613 }
1614
1615 if (addSeparators) {
1616 os << "# ====================\n"
1617 "# RECORDS\n\n";
1618 }
1619
1620 for (const auto &r : prog_.recordTable) {
1621 if (!panda_file::IsDummyClassName(r.first)) {
1622 Serialize(r.second, os, printInformation);
1623 } else {
1624 SerializeUnionFields(r.second, os, printInformation);
1625 }
1626 }
1627 }
1628
SerializeMethods(std::ostream & os,bool addSeparators,bool printInformation) const1629 void Disassembler::SerializeMethods(std::ostream &os, bool addSeparators, bool printInformation) const
1630 {
1631 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
1632
1633 if (prog_.functionTable.empty()) {
1634 return;
1635 }
1636
1637 if (addSeparators) {
1638 os << "# ====================\n"
1639 "# METHODS\n\n";
1640 }
1641
1642 for (const auto &m : prog_.functionTable) {
1643 Serialize(m.second, os, printInformation);
1644 }
1645 }
1646
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1647 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1648 {
1649 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1650 }
1651
IDToString(BytecodeInstruction bcIns,panda_file::File::EntityId methodId) const1652 std::string Disassembler::IDToString(BytecodeInstruction bcIns, panda_file::File::EntityId methodId) const
1653 {
1654 std::stringstream name;
1655
1656 if (bcIns.HasFlag(BytecodeInstruction::Flags::TYPE_ID)) {
1657 auto idx = bcIns.GetId().AsIndex();
1658 auto id = file_->ResolveClassIndex(methodId, idx);
1659 auto type = pandasm::Type::FromDescriptor(StringDataToString(file_->GetStringData(id)));
1660
1661 name.str("");
1662 name << type.GetPandasmName();
1663 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1664 auto idx = bcIns.GetId().AsIndex();
1665 auto id = file_->ResolveMethodIndex(methodId, idx);
1666
1667 name << GetMethodSignature(id);
1668 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
1669 name << '\"';
1670
1671 if (skipStrings_ || quiet_) {
1672 name << std::hex << "0x" << bcIns.GetId().AsFileId();
1673 } else {
1674 name << StringDataToString(file_->GetStringData(bcIns.GetId().AsFileId()));
1675 }
1676
1677 name << '\"';
1678 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1679 auto idx = bcIns.GetId().AsIndex();
1680 auto id = file_->ResolveFieldIndex(methodId, idx);
1681 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1682
1683 auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1684 if (!panda_file::IsDummyClassName(recordName)) {
1685 name << recordName;
1686 name << '.';
1687 }
1688 name << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1689 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
1690 auto index = bcIns.GetId().AsIndex();
1691 name << "array_" << index;
1692 }
1693
1694 return name.str();
1695 }
1696
GetRecordLanguage(panda_file::File::EntityId classId) const1697 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId classId) const
1698 {
1699 if (file_->IsExternal(classId)) {
1700 return panda::panda_file::SourceLang::PANDA_ASSEMBLY;
1701 }
1702
1703 panda_file::ClassDataAccessor cda(*file_, classId);
1704 return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1705 }
1706
TranslateImmToLabel(pandasm::Ins * paIns,LabelTable * labelTable,const uint8_t * insArr,BytecodeInstruction bcIns,BytecodeInstruction bcInsLast,panda_file::File::EntityId codeId)1707 static void TranslateImmToLabel(pandasm::Ins *paIns, LabelTable *labelTable, const uint8_t *insArr,
1708 BytecodeInstruction bcIns, BytecodeInstruction bcInsLast,
1709 panda_file::File::EntityId codeId)
1710 {
1711 const int32_t jmpOffset = std::get<int64_t>(paIns->imms.at(0));
1712 const auto bcInsDest = bcIns.JumpTo(jmpOffset);
1713 if (bcInsLast.GetAddress() > bcInsDest.GetAddress()) {
1714 size_t idx = GetBytecodeInstructionNumber(BytecodeInstruction(insArr), bcInsDest);
1715 if (idx != std::numeric_limits<size_t>::max()) {
1716 if (labelTable->find(idx) == labelTable->end()) {
1717 std::stringstream ss {};
1718 ss << "jump_label_" << labelTable->size();
1719 (*labelTable)[idx] = ss.str();
1720 }
1721
1722 paIns->imms.clear();
1723 paIns->ids.push_back(labelTable->at(idx));
1724 } else {
1725 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1726 << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1727 << ": invalid jump offset 0x" << jmpOffset
1728 << " - jumping in the middle of another instruction!";
1729 }
1730 } else {
1731 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1732 << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1733 << ": invalid jump offset 0x" << jmpOffset << " - jumping out of bounds!";
1734 }
1735 }
1736
CollectExternalFields(const panda_file::FieldDataAccessor & fieldAccessor)1737 void Disassembler::CollectExternalFields(const panda_file::FieldDataAccessor &fieldAccessor)
1738 {
1739 auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1740
1741 pandasm::Field field(fileLanguage_);
1742 GetField(field, fieldAccessor);
1743
1744 auto &fieldList = externalFieldTable_[recordName];
1745 auto retField = std::find_if(fieldList.begin(), fieldList.end(),
1746 [&field](pandasm::Field &fieldFromList) { return field.name == fieldFromList.name; });
1747 if (retField == fieldList.end()) {
1748 fieldList.push_back(std::move(field));
1749 }
1750 }
1751
GetInstructions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId)1752 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId methodId,
1753 panda_file::File::EntityId codeId)
1754 {
1755 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
1756
1757 const auto insSz = codeAccessor.GetCodeSize();
1758 const auto insArr = codeAccessor.GetInstructions();
1759
1760 method->regsNum = codeAccessor.GetNumVregs();
1761
1762 auto bcIns = BytecodeInstruction(insArr);
1763 auto from = bcIns.GetAddress();
1764 const auto bcInsLast = bcIns.JumpTo(insSz);
1765
1766 LabelTable labelTable = GetExceptions(method, methodId, codeId);
1767
1768 IdList unknownExternalMethods {};
1769
1770 while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
1771 if (bcIns.GetAddress() > bcInsLast.GetAddress()) {
1772 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1773 << "). bytecode instructions sequence corrupted for method " << method->name
1774 << "! went out of bounds";
1775
1776 break;
1777 }
1778
1779 if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1780 auto idx = bcIns.GetId().AsIndex();
1781 auto id = file_->ResolveFieldIndex(methodId, idx);
1782 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1783
1784 if (fieldAccessor.IsExternal()) {
1785 CollectExternalFields(fieldAccessor);
1786 }
1787 }
1788
1789 auto paIns = BytecodeInstructionToPandasmInstruction(bcIns, methodId);
1790 paIns.insDebug.boundLeft =
1791 bcIns.GetAddress() - from; // It is used to produce a line table during method serialization
1792 if (paIns.IsJump()) {
1793 TranslateImmToLabel(&paIns, &labelTable, insArr, bcIns, bcInsLast, codeId);
1794 }
1795
1796 // check if method id is unknown external method. if so, emplace it in table
1797 if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1798 const auto argMethodIdx = bcIns.GetId().AsIndex();
1799 const auto argMethodId = file_->ResolveMethodIndex(methodId, argMethodIdx);
1800
1801 const auto argMethodSignature = GetMethodSignature(argMethodId);
1802
1803 const bool isPresent = prog_.functionTable.find(argMethodSignature) != prog_.functionTable.cend();
1804 const bool isExternal = file_->IsExternal(argMethodId);
1805 if (isExternal && !isPresent) {
1806 unknownExternalMethods.push_back(argMethodId);
1807 }
1808 }
1809
1810 method->ins.push_back(paIns);
1811 bcIns = bcIns.GetNext();
1812 }
1813
1814 for (const auto &pair : labelTable) {
1815 method->ins[pair.first].label = pair.second;
1816 method->ins[pair.first].setLabel = true;
1817 }
1818
1819 return unknownExternalMethods;
1820 }
1821
1822 } // namespace panda::disasm
1823