1 /*
2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "disassembler.h"
17 #include "class_data_accessor.h"
18 #include "field_data_accessor.h"
19 #include "libpandafile/type_helper.h"
20 #include "literal_data_accessor.h"
21 #include "mangling.h"
22 #include "utils/logger.h"
23
24 #include <cstdint>
25 #include <iomanip>
26 #include <charconv>
27
28 #include "get_language_specific_metadata.inc"
29
30 namespace ark::disasm {
31
Disassemble(std::string_view filenameIn,const bool quiet,const bool skipStrings)32 void Disassembler::Disassemble(std::string_view filenameIn, const bool quiet, const bool skipStrings)
33 {
34 auto file = panda_file::File::Open(filenameIn);
35 if (file == nullptr) {
36 LOG(FATAL, DISASSEMBLER) << "> unable to open specified pandafile: <" << filenameIn << ">";
37 }
38
39 Disassemble(file, quiet, skipStrings);
40 }
41
Disassemble(const panda_file::File & file,const bool quiet,const bool skipStrings)42 void Disassembler::Disassemble(const panda_file::File &file, const bool quiet, const bool skipStrings)
43 {
44 SetFile(file);
45 DisassembleImpl(quiet, skipStrings);
46 }
47
Disassemble(std::unique_ptr<const panda_file::File> & file,const bool quiet,const bool skipStrings)48 void Disassembler::Disassemble(std::unique_ptr<const panda_file::File> &file, const bool quiet, const bool skipStrings)
49 {
50 SetFile(file);
51 DisassembleImpl(quiet, skipStrings);
52 }
53
DisassembleImpl(const bool quiet,const bool skipStrings)54 void Disassembler::DisassembleImpl(const bool quiet, const bool skipStrings)
55 {
56 prog_ = pandasm::Program {};
57
58 recordNameToId_.clear();
59 methodStaticNameToId_.clear();
60 methodInstanceNameToId_.clear();
61
62 skipStrings_ = skipStrings;
63 quiet_ = quiet;
64
65 progInfo_ = ProgInfo {};
66
67 progAnn_ = ProgAnnotations {};
68
69 GetLiteralArrays();
70 GetRecords();
71
72 AddExternalFieldsToRecords();
73 GetLanguageSpecificMetadata();
74 }
75
SetFile(std::unique_ptr<const panda_file::File> & file)76 void Disassembler::SetFile(std::unique_ptr<const panda_file::File> &file)
77 {
78 fileHolder_.swap(file);
79 file_ = fileHolder_.get();
80 }
81
SetFile(const panda_file::File & file)82 void Disassembler::SetFile(const panda_file::File &file)
83 {
84 fileHolder_.reset();
85 file_ = &file;
86 }
87
SetProfile(std::string_view fname)88 void Disassembler::SetProfile(std::string_view fname)
89 {
90 std::ifstream stm(fname.data(), std::ios::binary);
91 if (!stm.is_open()) {
92 LOG(FATAL, DISASSEMBLER) << "Cannot open profile file";
93 }
94
95 auto res = profiling::ReadProfile(stm, fileLanguage_);
96 if (!res) {
97 LOG(FATAL, DISASSEMBLER) << "Failed to deserialize: " << res.Error();
98 }
99 profile_ = res.Value();
100 }
101
GetInsInfo(panda_file::MethodDataAccessor & mda,const panda_file::File::EntityId & codeId,MethodInfo * methodInfo) const102 void Disassembler::GetInsInfo(panda_file::MethodDataAccessor &mda, const panda_file::File::EntityId &codeId,
103 MethodInfo *methodInfo /* out */) const
104 {
105 const static size_t FORMAT_WIDTH = 20;
106 const static size_t INSTRUCTION_WIDTH = 2;
107
108 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
109
110 std::string methodName = mda.GetFullName();
111 auto prof = profiling::INVALID_PROFILE;
112 if (profile_ != profiling::INVALID_PROFILE) {
113 prof = profiling::FindMethodInProfile(profile_, fileLanguage_, methodName);
114 }
115
116 auto insSz = codeAccessor.GetCodeSize();
117 auto insArr = codeAccessor.GetInstructions();
118
119 auto bcIns = BytecodeInstruction(insArr);
120 auto bcInsLast = bcIns.JumpTo(insSz);
121
122 while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
123 std::stringstream ss;
124
125 uintptr_t bc = bcIns.GetAddress() - BytecodeInstruction(insArr).GetAddress();
126 ss << "offset: 0x" << std::setfill('0') << std::setw(4U) << std::hex << bc;
127 ss << ", " << std::setfill('.');
128
129 BytecodeInstruction::Format format = bcIns.GetFormat();
130
131 auto formatStr = std::string("[") + BytecodeInstruction::GetFormatString(format) + ']';
132 ss << std::setw(FORMAT_WIDTH) << std::left << formatStr;
133
134 ss << "[";
135
136 const uint8_t *pc = bcIns.GetAddress();
137 const size_t sz = bcIns.GetSize();
138
139 for (size_t i = 0; i < sz; i++) {
140 ss << "0x" << std::setw(INSTRUCTION_WIDTH) << std::setfill('0') << std::right << std::hex
141 << static_cast<int>(pc[i]); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
142
143 if (i != sz - 1) {
144 ss << " ";
145 }
146 }
147
148 ss << "]";
149
150 if (profile_ != profiling::INVALID_PROFILE && prof != profiling::INVALID_PROFILE) {
151 auto profId = bcIns.GetProfileId();
152 if (profId != -1) {
153 ss << ", Profile: ";
154 profiling::DumpProfile(prof, fileLanguage_, &bcIns, ss);
155 }
156 }
157
158 methodInfo->instructionsInfo.push_back(ss.str());
159
160 bcIns = bcIns.GetNext();
161 }
162 }
163
CollectInfo()164 void Disassembler::CollectInfo()
165 {
166 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
167
168 debugInfoExtractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_);
169
170 for (const auto &pair : recordNameToId_) {
171 GetRecordInfo(pair.second, &progInfo_.recordsInfo[pair.first]);
172 }
173
174 for (const auto &pair : methodStaticNameToId_) {
175 GetMethodInfo(pair.second, &progInfo_.methodsStaticInfo[pair.first]);
176 }
177 for (const auto &pair : methodInstanceNameToId_) {
178 GetMethodInfo(pair.second, &progInfo_.methodsInstanceInfo[pair.first]);
179 }
180
181 AddExternalFieldsInfoToRecords();
182 }
183
Serialize(std::ostream & os,bool addSeparators,bool printInformation) const184 void Disassembler::Serialize(std::ostream &os, bool addSeparators, bool printInformation) const
185 {
186 if (os.bad()) {
187 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
188
189 return;
190 }
191
192 SerializeFilename(os);
193 SerializeLanguage(os);
194 SerializeLitArrays(os, addSeparators);
195 SerializeRecords(os, addSeparators, printInformation);
196 SerializeMethods(os, addSeparators, printInformation);
197 }
198
SerializePrintStartInfo(const pandasm::Function & method,std::ostringstream & headerSs) const199 void Disassembler::SerializePrintStartInfo(const pandasm::Function &method, std::ostringstream &headerSs) const
200 {
201 headerSs << ".function " << method.returnType.GetPandasmName() << " " << method.name << "(";
202
203 if (!method.params.empty()) {
204 headerSs << method.params[0].type.GetPandasmName() << " a0";
205
206 for (size_t i = 1; i < method.params.size(); i++) {
207 headerSs << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
208 }
209 }
210 headerSs << ")";
211 }
212
SerializeCheckEnd(const pandasm::Function & method,std::ostream & os,bool printMethodInfo,const MethodInfo * & methodInfo) const213 void Disassembler::SerializeCheckEnd(const pandasm::Function &method, std::ostream &os, bool printMethodInfo,
214 const MethodInfo *&methodInfo) const
215 {
216 if (!method.catchBlocks.empty()) {
217 os << "\n";
218
219 for (const auto &catchBlock : method.catchBlocks) {
220 Serialize(catchBlock, os);
221 os << "\n";
222 }
223 }
224
225 if (printMethodInfo) {
226 ASSERT(methodInfo != nullptr);
227 SerializeLineNumberTable(methodInfo->lineNumberTable, os);
228 SerializeLocalVariableTable(methodInfo->localVariableTable, method, os);
229 }
230
231 os << "}\n\n";
232 }
233
SerializeIfPrintMethodInfo(const pandasm::Function & method,bool printMethodInfo,std::ostringstream & headerSs,const MethodInfo * & methodInfo,std::map<std::string,ark::disasm::MethodInfo>::const_iterator & methodInfoIt) const234 size_t Disassembler::SerializeIfPrintMethodInfo(
235 const pandasm::Function &method, bool printMethodInfo, std::ostringstream &headerSs, const MethodInfo *&methodInfo,
236 std::map<std::string, ark::disasm::MethodInfo>::const_iterator &methodInfoIt) const
237 {
238 size_t width = 0;
239 if (printMethodInfo) {
240 methodInfo = &methodInfoIt->second;
241
242 for (const auto &i : method.ins) {
243 if (i.ToString().size() > width) {
244 width = i.ToString().size();
245 }
246 }
247
248 headerSs << " # " << methodInfo->methodInfo << "\n# CODE:";
249 }
250
251 headerSs << "\n";
252 return width;
253 }
254
255 // CC-OFFNXT(huge_method) solid logic
Serialize(const pandasm::Function & method,std::ostream & os,bool printInformation,panda_file::LineNumberTable * lineTable) const256 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool printInformation,
257 panda_file::LineNumberTable *lineTable) const
258 {
259 std::ostringstream headerSs;
260 SerializePrintStartInfo(method, headerSs);
261 const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
262 const auto methodIter = progAnn_.methodAnnotations.find(signature);
263 if (methodIter != progAnn_.methodAnnotations.end()) {
264 Serialize(*method.metadata, methodIter->second, headerSs);
265 } else {
266 Serialize(*method.metadata, {}, headerSs);
267 }
268
269 if (!method.HasImplementation()) {
270 headerSs << "\n\n";
271 os << headerSs.str();
272 return;
273 }
274
275 headerSs << " {";
276
277 const MethodInfo *methodInfo = nullptr;
278 auto &methodsInfo = method.IsStatic() ? progInfo_.methodsStaticInfo : progInfo_.methodsInstanceInfo;
279 auto methodInfoIt = methodsInfo.find(signature);
280 bool printMethodInfo = printInformation && methodInfoIt != methodsInfo.end();
281 size_t width = SerializeIfPrintMethodInfo(method, printMethodInfo, headerSs, methodInfo, methodInfoIt);
282
283 auto headerSsStr = headerSs.str();
284 size_t lineNumber = static_cast<size_t>(std::count(headerSsStr.begin(), headerSsStr.end(), '\n')) + 1;
285
286 os << headerSsStr;
287
288 for (size_t i = 0; i < method.ins.size(); i++) {
289 std::ostringstream insSs;
290
291 std::string ins = method.ins[i].ToString("", method.GetParamsNum() != 0, method.regsNum);
292 if (method.ins[i].setLabel) {
293 insSs << ins.substr(0, ins.find(": ")) << ":\n";
294 ins.erase(0, ins.find(": ") + std::string(": ").length());
295 }
296
297 insSs << "\t";
298 if (printMethodInfo) {
299 insSs << std::setw(width) << std::left;
300 }
301 insSs << ins;
302 if (printMethodInfo) {
303 ASSERT(methodInfo != nullptr);
304 insSs << " # " << methodInfo->instructionsInfo[i];
305 }
306 insSs << "\n";
307
308 auto insSsStr = insSs.str();
309 lineNumber += static_cast<size_t>(std::count(insSsStr.begin(), insSsStr.end(), '\n'));
310
311 if (lineTable != nullptr) {
312 lineTable->emplace_back(
313 panda_file::LineTableEntry {static_cast<uint32_t>(method.ins[i].insDebug.boundLeft), lineNumber - 1});
314 }
315
316 os << insSsStr;
317 }
318
319 SerializeCheckEnd(method, os, printMethodInfo, methodInfo);
320 }
321
IsSystemType(const std::string & typeName)322 inline bool Disassembler::IsSystemType(const std::string &typeName)
323 {
324 bool isArrayType = typeName.back() == ']';
325 bool isGlobal = typeName == "_GLOBAL";
326
327 return isArrayType || isGlobal;
328 }
329
GetRecord(pandasm::Record & record,const panda_file::File::EntityId & recordId)330 void Disassembler::GetRecord(pandasm::Record &record, const panda_file::File::EntityId &recordId)
331 {
332 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << recordId << " (0x" << std::hex << recordId << ")";
333
334 record.name = GetFullRecordName(recordId);
335
336 LOG(DEBUG, DISASSEMBLER) << "name: " << record.name;
337
338 GetMetaData(&record, recordId);
339
340 if (!file_->IsExternal(recordId)) {
341 GetMethods(recordId);
342 GetFields(record, recordId);
343 }
344 }
345
AddMethodToTables(const panda_file::File::EntityId & methodId)346 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &methodId)
347 {
348 panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
349 pandasm::Function newMethod("", fileLanguage_);
350 GetMethod(&newMethod, methodId);
351
352 const auto signature = pandasm::GetFunctionSignatureFromName(newMethod.name, newMethod.params);
353 auto isStatic = methodAccessor.IsStatic();
354 auto &functionTable = isStatic ? prog_.functionStaticTable : prog_.functionInstanceTable;
355 if (functionTable.find(signature) != functionTable.end()) {
356 return;
357 }
358
359 if (isStatic) {
360 methodStaticNameToId_.emplace(signature, methodId);
361 } else {
362 methodInstanceNameToId_.emplace(signature, methodId);
363 }
364
365 prog_.functionSynonyms[newMethod.name].push_back(signature);
366 functionTable.emplace(signature, std::move(newMethod));
367 }
368
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & methodId)369 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &methodId)
370 {
371 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << methodId << " (0x" << std::hex << methodId << ")";
372
373 if (method == nullptr) {
374 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
375
376 return;
377 }
378
379 panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
380
381 method->name = GetFullMethodName(methodId);
382
383 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
384
385 GetParams(method, methodAccessor.GetProtoId());
386 GetMetaData(method, methodId);
387
388 if (!method->HasImplementation()) {
389 return;
390 }
391
392 if (methodAccessor.GetCodeId().has_value()) {
393 const IdList idList = GetInstructions(method, methodId, methodAccessor.GetCodeId().value());
394
395 for (const auto &id : idList) {
396 AddMethodToTables(id);
397 }
398 } else {
399 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << methodId << " (0x" << std::hex << methodId
400 << "). implementation of method expected, but no \'CODE\' tag was found!";
401
402 return;
403 }
404 }
405
406 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * litArray,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const407 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *litArray, const panda_file::LiteralTag &tag,
408 const panda_file::LiteralDataAccessor::LiteralValue &value) const
409 {
410 panda_file::File::EntityId id(std::get<uint32_t>(value));
411 auto sp = file_->GetSpanFromId(id);
412 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
413 if (tag != panda_file::LiteralTag::ARRAY_STRING) {
414 for (size_t i = 0; i < len; i++) {
415 pandasm::LiteralArray::Literal lit;
416 lit.tag = tag;
417 lit.value = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
418 litArray->literals.push_back(lit);
419 }
420 } else {
421 for (size_t i = 0; i < len; i++) {
422 auto strId = panda_file::helpers::Read<sizeof(T)>(&sp);
423 pandasm::LiteralArray::Literal lit;
424 lit.tag = tag;
425 lit.value = StringDataToString(file_->GetStringData(panda_file::File::EntityId(strId)));
426 litArray->literals.push_back(lit);
427 }
428 }
429 }
430
FillLiteralData(pandasm::LiteralArray * litArray,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const431 void Disassembler::FillLiteralData(pandasm::LiteralArray *litArray,
432 const panda_file::LiteralDataAccessor::LiteralValue &value,
433 const panda_file::LiteralTag &tag) const
434 {
435 pandasm::LiteralArray::Literal lit;
436 if (tag == panda_file::LiteralTag::TAGVALUE) {
437 return;
438 }
439 lit.tag = tag;
440 lit.value = ParseLiteralValue(value, tag);
441 litArray->literals.push_back(lit);
442 }
443
ParseLiteralValue(const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const444 std::variant<bool, uint8_t, uint16_t, uint32_t, uint64_t, float, double, std::string> Disassembler::ParseLiteralValue(
445 const panda_file::LiteralDataAccessor::LiteralValue &value, const panda_file::LiteralTag &tag) const
446 {
447 switch (tag) {
448 case panda_file::LiteralTag::BOOL:
449 return std::get<bool>(value);
450 case panda_file::LiteralTag::ACCESSOR:
451 case panda_file::LiteralTag::NULLVALUE:
452 return std::get<uint8_t>(value);
453 case panda_file::LiteralTag::METHODAFFILIATE:
454 return std::get<uint16_t>(value);
455 case panda_file::LiteralTag::INTEGER:
456 return std::get<uint32_t>(value);
457 case panda_file::LiteralTag::BIGINT:
458 return std::get<uint64_t>(value);
459 case panda_file::LiteralTag::FLOAT:
460 return std::get<float>(value);
461 case panda_file::LiteralTag::DOUBLE:
462 return std::get<double>(value);
463 case panda_file::LiteralTag::STRING:
464 case panda_file::LiteralTag::METHOD:
465 case panda_file::LiteralTag::GENERATORMETHOD:
466 return ParseStringData(value);
467 case panda_file::LiteralTag::LITERALARRAY:
468 return ParseLiteralArrayData(value);
469 default:
470 LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
471 UNREACHABLE();
472 }
473 }
474
ParseStringData(const panda_file::LiteralDataAccessor::LiteralValue & value) const475 std::string Disassembler::ParseStringData(const panda_file::LiteralDataAccessor::LiteralValue &value) const
476 {
477 auto strData = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
478 return StringDataToString(strData);
479 }
480
ParseLiteralArrayData(const panda_file::LiteralDataAccessor::LiteralValue & value) const481 std::string Disassembler::ParseLiteralArrayData(const panda_file::LiteralDataAccessor::LiteralValue &value) const
482 {
483 std::stringstream ss;
484 ss << "0x" << std::hex << std::get<uint32_t>(value);
485 return ss.str();
486 }
487
GetLiteralArrayByOffset(pandasm::LiteralArray * litArray,panda_file::File::EntityId offset) const488 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *litArray, panda_file::File::EntityId offset) const
489 {
490 panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
491 auto processLiteralValue = [this, litArray](const panda_file::LiteralDataAccessor::LiteralValue &value,
492 const panda_file::LiteralTag &tag) {
493 switch (tag) {
494 case panda_file::LiteralTag::ARRAY_U1: {
495 FillLiteralArrayData<bool>(litArray, tag, value);
496 break;
497 }
498 case panda_file::LiteralTag::ARRAY_I8:
499 case panda_file::LiteralTag::ARRAY_U8: {
500 FillLiteralArrayData<uint8_t>(litArray, tag, value);
501 break;
502 }
503 case panda_file::LiteralTag::ARRAY_I16:
504 case panda_file::LiteralTag::ARRAY_U16: {
505 FillLiteralArrayData<uint16_t>(litArray, tag, value);
506 break;
507 }
508 case panda_file::LiteralTag::ARRAY_I32:
509 case panda_file::LiteralTag::ARRAY_U32: {
510 FillLiteralArrayData<uint32_t>(litArray, tag, value);
511 break;
512 }
513 case panda_file::LiteralTag::ARRAY_I64:
514 case panda_file::LiteralTag::ARRAY_U64: {
515 FillLiteralArrayData<uint64_t>(litArray, tag, value);
516 break;
517 }
518 case panda_file::LiteralTag::ARRAY_F32: {
519 FillLiteralArrayData<float>(litArray, tag, value);
520 break;
521 }
522 case panda_file::LiteralTag::ARRAY_F64: {
523 FillLiteralArrayData<double>(litArray, tag, value);
524 break;
525 }
526 case panda_file::LiteralTag::ARRAY_STRING: {
527 FillLiteralArrayData<uint32_t>(litArray, tag, value);
528 break;
529 }
530 default: {
531 FillLiteralData(litArray, value, tag);
532 break;
533 }
534 }
535 };
536
537 litArrayAccessor.EnumerateLiteralVals(offset, processLiteralValue);
538 }
539
GetLiteralArray(pandasm::LiteralArray * litArray,const size_t index)540 void Disassembler::GetLiteralArray(pandasm::LiteralArray *litArray, const size_t index)
541 {
542 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal array]\nindex: " << index;
543
544 panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
545 GetLiteralArrayByOffset(litArray, litArrayAccessor.GetLiteralArrayId(index));
546 }
547
GetLiteralArrays()548 void Disassembler::GetLiteralArrays()
549 {
550 const auto litArraysId = file_->GetLiteralArraysId();
551
552 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << litArraysId << " (0x" << std::hex << litArraysId
553 << ")";
554
555 panda_file::LiteralDataAccessor litArrayAccessor(*file_, litArraysId);
556 size_t numLitarrays = litArrayAccessor.GetLiteralNum();
557 for (size_t index = 0; index < numLitarrays; index++) {
558 ark::pandasm::LiteralArray litAr;
559 GetLiteralArray(&litAr, index);
560 prog_.literalarrayTable.emplace(std::to_string(index), litAr);
561 }
562 }
563
GetRecords()564 void Disassembler::GetRecords()
565 {
566 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
567
568 const auto classIdx = file_->GetClasses();
569
570 for (size_t i = 0; i < classIdx.size(); i++) {
571 uint32_t classId = classIdx[i];
572 auto classOff = file_->GetHeader()->classIdxOff + sizeof(uint32_t) * i;
573
574 if (classId > file_->GetHeader()->fileSize) {
575 LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << classOff << " (0x" << std::hex
576 << classOff << "). binary file corrupted. record offset (0x" << classId
577 << ") out of bounds (0x" << file_->GetHeader()->fileSize << ")!";
578 break;
579 }
580
581 const panda_file::File::EntityId recordId {classId};
582 auto language = GetRecordLanguage(recordId);
583 if (language != fileLanguage_) {
584 if (fileLanguage_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
585 fileLanguage_ = language;
586 } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
587 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << classOff << " (0x"
588 << std::hex << classOff << "). record's language ("
589 << panda_file::LanguageToString(language)
590 << ") differs from file's language ("
591 << panda_file::LanguageToString(fileLanguage_) << ")!";
592 }
593 }
594
595 pandasm::Record record("", fileLanguage_);
596 GetRecord(record, recordId);
597
598 if (prog_.recordTable.find(record.name) == prog_.recordTable.end()) {
599 recordNameToId_.emplace(record.name, recordId);
600 prog_.recordTable.emplace(record.name, std::move(record));
601 }
602 }
603 }
604
GetField(pandasm::Field & field,const panda_file::FieldDataAccessor & fieldAccessor)605 void Disassembler::GetField(pandasm::Field &field, const panda_file::FieldDataAccessor &fieldAccessor)
606 {
607 panda_file::File::EntityId fieldNameId = fieldAccessor.GetNameId();
608 field.name = StringDataToString(file_->GetStringData(fieldNameId));
609
610 uint32_t fieldType = fieldAccessor.GetType();
611 field.type = FieldTypeToPandasmType(fieldType);
612
613 GetMetaData(&field, fieldAccessor.GetFieldId());
614 }
615
GetFields(pandasm::Record & record,const panda_file::File::EntityId & recordId)616 void Disassembler::GetFields(pandasm::Record &record, const panda_file::File::EntityId &recordId)
617 {
618 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
619
620 classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
621 pandasm::Field field(fileLanguage_);
622
623 GetField(field, fieldAccessor);
624
625 record.fieldList.push_back(std::move(field));
626 });
627 }
628
AddExternalFieldsToRecords()629 void Disassembler::AddExternalFieldsToRecords()
630 {
631 for (auto &[recordName, record] : prog_.recordTable) {
632 auto iter = externalFieldTable_.find(recordName);
633 if (iter == externalFieldTable_.end() || iter->second.empty()) {
634 continue;
635 }
636 for (auto &fieldIter : iter->second) {
637 record.fieldList.push_back(std::move(fieldIter));
638 }
639 externalFieldTable_.erase(recordName);
640 }
641 }
642
AddExternalFieldsInfoToRecords()643 void Disassembler::AddExternalFieldsInfoToRecords()
644 {
645 for (auto &[recordName, recordInfo] : progInfo_.recordsInfo) {
646 auto iter = externalFieldsInfoTable_.find(recordName);
647 if (iter == externalFieldsInfoTable_.end() || iter->second.empty()) {
648 continue;
649 }
650 for (auto &info : iter->second) {
651 recordInfo.fieldsInfo.push_back(std::move(info));
652 }
653 externalFieldsInfoTable_.erase(recordName);
654 }
655 }
656
GetMethods(const panda_file::File::EntityId & recordId)657 void Disassembler::GetMethods(const panda_file::File::EntityId &recordId)
658 {
659 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
660
661 classAccessor.EnumerateMethods([&](panda_file::MethodDataAccessor &methodAccessor) -> void {
662 AddMethodToTables(methodAccessor.GetMethodId());
663 });
664 }
665
GetParams(pandasm::Function * method,const panda_file::File::EntityId & protoId) const666 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &protoId) const
667 {
668 /// frame size - 2^16 - 1
669 static const uint32_t MAX_ARG_NUM = 0xFFFF;
670
671 LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << protoId << " (0x" << std::hex << protoId << ")";
672
673 if (method == nullptr) {
674 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
675
676 return;
677 }
678
679 panda_file::ProtoDataAccessor protoAccessor(*file_, protoId);
680
681 if (protoAccessor.GetNumArgs() > MAX_ARG_NUM) {
682 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << protoId << " (0x" << std::hex << protoId
683 << "). number of function's arguments (" << std::dec << protoAccessor.GetNumArgs()
684 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
685
686 return;
687 }
688
689 size_t refIdx = 0;
690 method->returnType = PFTypeToPandasmType(protoAccessor.GetReturnType(), protoAccessor, refIdx);
691
692 for (size_t i = 0; i < protoAccessor.GetNumArgs(); i++) {
693 auto argType = PFTypeToPandasmType(protoAccessor.GetArgType(i), protoAccessor, refIdx);
694 method->params.emplace_back(argType, fileLanguage_);
695 }
696 }
697
GetExceptions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId) const698 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId methodId,
699 panda_file::File::EntityId codeId) const
700 {
701 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << codeId << " (0x" << std::hex << codeId << ")";
702
703 if (method == nullptr) {
704 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
705 return LabelTable {};
706 }
707
708 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
709
710 const auto bcIns = BytecodeInstruction(codeAccessor.GetInstructions());
711 const auto bcInsLast = bcIns.JumpTo(codeAccessor.GetCodeSize());
712
713 size_t tryIdx = 0;
714 LabelTable labelTable {};
715 codeAccessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &tryBlock) {
716 pandasm::Function::CatchBlock catchBlockPa {};
717 if (!LocateTryBlock(bcIns, bcInsLast, tryBlock, &catchBlockPa, &labelTable, tryIdx)) {
718 return false;
719 }
720 size_t catchIdx = 0;
721 tryBlock.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catchBlock) {
722 auto classIdx = catchBlock.GetTypeIdx();
723 if (classIdx == panda_file::INVALID_INDEX) {
724 catchBlockPa.exceptionRecord = "";
725 } else {
726 const auto classId = file_->ResolveClassIndex(methodId, classIdx);
727 catchBlockPa.exceptionRecord = GetFullRecordName(classId);
728 }
729 if (!LocateCatchBlock(bcIns, bcInsLast, catchBlock, &catchBlockPa, &labelTable, tryIdx, catchIdx)) {
730 return false;
731 }
732
733 method->catchBlocks.push_back(catchBlockPa);
734 catchBlockPa.catchBeginLabel = "";
735 catchBlockPa.catchEndLabel = "";
736 catchIdx++;
737
738 return true;
739 });
740 tryIdx++;
741
742 return true;
743 });
744
745 return labelTable;
746 }
747
GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst,BytecodeInstruction bcInsCur)748 static size_t GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst, BytecodeInstruction bcInsCur)
749 {
750 size_t count = 0;
751
752 while (bcInsFirst.GetAddress() != bcInsCur.GetAddress()) {
753 count++;
754 bcInsFirst = bcInsFirst.GetNext();
755 if (bcInsFirst.GetAddress() > bcInsCur.GetAddress()) {
756 return std::numeric_limits<size_t>::max();
757 }
758 }
759
760 return count;
761 }
762
763 // CC-OFFNXT(G.FUN.01) solid logic
LocateTryBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::TryBlock & tryBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx) const764 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
765 const panda_file::CodeDataAccessor::TryBlock &tryBlock,
766 pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable,
767 size_t tryIdx) const
768 {
769 const auto tryBeginBcIns = bcIns.JumpTo(tryBlock.GetStartPc());
770 const auto tryEndBcIns = bcIns.JumpTo(tryBlock.GetStartPc() + tryBlock.GetLength());
771
772 const size_t tryBeginIdx = GetBytecodeInstructionNumber(bcIns, tryBeginBcIns);
773 const size_t tryEndIdx = GetBytecodeInstructionNumber(bcIns, tryEndBcIns);
774
775 const bool tryBeginOffsetInRange = bcInsLast.GetAddress() > tryBeginBcIns.GetAddress();
776 const bool tryEndOffsetInRange = bcInsLast.GetAddress() >= tryEndBcIns.GetAddress();
777 const bool tryBeginOffsetValid = tryBeginIdx != std::numeric_limits<size_t>::max();
778 const bool tryEndOffsetValid = tryEndIdx != std::numeric_limits<size_t>::max();
779
780 if (!tryBeginOffsetInRange || !tryBeginOffsetValid) {
781 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
782 << tryBeginBcIns.GetAddress();
783 return false;
784 }
785
786 auto itBegin = labelTable->find(tryBeginIdx);
787 if (itBegin == labelTable->end()) {
788 std::stringstream ss {};
789 ss << "try_begin_label_" << tryIdx;
790 catchBlockPa->tryBeginLabel = ss.str();
791 labelTable->insert(std::pair<size_t, std::string>(tryBeginIdx, ss.str()));
792 } else {
793 catchBlockPa->tryBeginLabel = itBegin->second;
794 }
795
796 if (!tryEndOffsetInRange || !tryEndOffsetValid) {
797 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
798 << tryEndBcIns.GetAddress();
799 return false;
800 }
801
802 auto itEnd = labelTable->find(tryEndIdx);
803 if (itEnd == labelTable->end()) {
804 std::stringstream ss {};
805 ss << "try_end_label_" << tryIdx;
806 catchBlockPa->tryEndLabel = ss.str();
807 labelTable->insert(std::pair<size_t, std::string>(tryEndIdx, ss.str()));
808 } else {
809 catchBlockPa->tryEndLabel = itEnd->second;
810 }
811
812 return true;
813 }
814
LocateCatchBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::CatchBlock & catchBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx,size_t catchIdx) const815 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
816 const panda_file::CodeDataAccessor::CatchBlock &catchBlock,
817 pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable, size_t tryIdx,
818 size_t catchIdx) const
819 {
820 const auto handlerBeginOffset = catchBlock.GetHandlerPc();
821 const auto handlerEndOffset = handlerBeginOffset + catchBlock.GetCodeSize();
822
823 const auto handlerBeginBcIns = bcIns.JumpTo(handlerBeginOffset);
824 const auto handlerEndBcIns = bcIns.JumpTo(handlerEndOffset);
825
826 const size_t handlerBeginIdx = GetBytecodeInstructionNumber(bcIns, handlerBeginBcIns);
827 const size_t handlerEndIdx = GetBytecodeInstructionNumber(bcIns, handlerEndBcIns);
828
829 const bool handlerBeginOffsetInRange = bcInsLast.GetAddress() > handlerBeginBcIns.GetAddress();
830 const bool handlerEndOffsetInRange = bcInsLast.GetAddress() > handlerEndBcIns.GetAddress();
831 const bool handlerEndPresent = catchBlock.GetCodeSize() != 0;
832 const bool handlerBeginOffsetValid = handlerBeginIdx != std::numeric_limits<size_t>::max();
833 const bool handlerEndOffsetValid = handlerEndIdx != std::numeric_limits<size_t>::max();
834
835 if (!handlerBeginOffsetInRange || !handlerBeginOffsetValid) {
836 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
837 << handlerBeginBcIns.GetAddress();
838 return false;
839 }
840
841 auto itBegin = labelTable->find(handlerBeginIdx);
842 if (itBegin == labelTable->end()) {
843 std::stringstream ss {};
844 ss << "handler_begin_label_" << tryIdx << "_" << catchIdx;
845 catchBlockPa->catchBeginLabel = ss.str();
846 labelTable->insert(std::pair<size_t, std::string>(handlerBeginIdx, ss.str()));
847 } else {
848 catchBlockPa->catchBeginLabel = itBegin->second;
849 }
850
851 if (!handlerEndOffsetInRange || !handlerEndOffsetValid) {
852 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
853 << handlerEndBcIns.GetAddress();
854 return false;
855 }
856
857 if (handlerEndPresent) {
858 auto itEnd = labelTable->find(handlerEndIdx);
859 if (itEnd == labelTable->end()) {
860 std::stringstream ss {};
861 ss << "handler_end_label_" << tryIdx << "_" << catchIdx;
862 catchBlockPa->catchEndLabel = ss.str();
863 labelTable->insert(std::pair<size_t, std::string>(handlerEndIdx, ss.str()));
864 } else {
865 catchBlockPa->catchEndLabel = itEnd->second;
866 }
867 }
868
869 return true;
870 }
871
872 template <typename T>
SetEntityAttribute(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute)873 static void SetEntityAttribute(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute)
874 {
875 if (shouldSet()) {
876 auto err = entity->metadata->SetAttribute(attribute);
877 if (err.has_value()) {
878 LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
879 }
880 }
881 }
882
883 template <typename T>
SetEntityAttributeValue(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute,const char * value)884 static void SetEntityAttributeValue(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute,
885 const char *value)
886 {
887 if (shouldSet()) {
888 auto err = entity->metadata->SetAttributeValue(attribute, value);
889 if (err.has_value()) {
890 LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
891 }
892 }
893 }
894
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & methodId) const895 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &methodId) const
896 {
897 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << methodId << " (0x" << std::hex << methodId << ")";
898
899 if (method == nullptr) {
900 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
901
902 return;
903 }
904
905 panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
906
907 const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
908
909 if (!methodAccessor.IsStatic()) {
910 const auto className = StringDataToString(file_->GetStringData(methodAccessor.GetClassId()));
911 auto thisType = pandasm::Type::FromDescriptor(className);
912
913 LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << methodNameRaw
914 << "\') is not static. emplacing self-argument of type " << thisType.GetName();
915
916 method->params.insert(method->params.begin(), pandasm::Function::Parameter(thisType, fileLanguage_));
917 }
918 SetEntityAttribute(
919 method, [&methodAccessor]() { return methodAccessor.IsStatic(); }, "static");
920
921 SetEntityAttribute(
922 method, [this, &methodAccessor]() { return file_->IsExternal(methodAccessor.GetMethodId()); }, "external");
923
924 SetEntityAttribute(
925 method, [&methodAccessor]() { return methodAccessor.IsNative(); }, "native");
926
927 SetEntityAttribute(
928 method, [&methodAccessor]() { return methodAccessor.IsAbstract(); }, "noimpl");
929
930 SetEntityAttribute(
931 method, [&methodAccessor]() { return methodAccessor.IsVarArgs(); }, "varargs");
932
933 SetEntityAttributeValue(
934 method, [&methodAccessor]() { return methodAccessor.IsPublic(); }, "access.function", "public");
935
936 SetEntityAttributeValue(
937 method, [&methodAccessor]() { return methodAccessor.IsProtected(); }, "access.function", "protected");
938
939 SetEntityAttributeValue(
940 method, [&methodAccessor]() { return methodAccessor.IsPrivate(); }, "access.function", "private");
941
942 SetEntityAttribute(
943 method, [&methodAccessor]() { return methodAccessor.IsFinal(); }, "final");
944
945 std::string ctorName = ark::panda_file::GetCtorName(fileLanguage_);
946 std::string cctorName = ark::panda_file::GetCctorName(fileLanguage_);
947
948 const bool isCtor = (methodNameRaw == ctorName);
949 const bool isCctor = (methodNameRaw == cctorName);
950
951 if (isCtor) {
952 method->metadata->SetAttribute("ctor");
953 method->name.replace(method->name.find(ctorName), ctorName.length(), "_ctor_");
954 } else if (isCctor) {
955 method->metadata->SetAttribute("cctor");
956 method->name.replace(method->name.find(cctorName), cctorName.length(), "_cctor_");
957 }
958 }
959
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & recordId) const960 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &recordId) const
961 {
962 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << recordId << " (0x" << std::hex << recordId << ")";
963
964 if (record == nullptr) {
965 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
966
967 return;
968 }
969
970 SetEntityAttribute(
971 record, [this, recordId]() { return file_->IsExternal(recordId); }, "external");
972
973 auto external = file_->IsExternal(recordId);
974 if (!external) {
975 auto cda = panda_file::ClassDataAccessor {*file_, recordId};
976 SetEntityAttributeValue(
977 record, [&cda]() { return cda.IsPublic(); }, "access.record", "public");
978
979 SetEntityAttributeValue(
980 record, [&cda]() { return cda.IsProtected(); }, "access.record", "protected");
981
982 SetEntityAttributeValue(
983 record, [&cda]() { return cda.IsPrivate(); }, "access.record", "private");
984
985 SetEntityAttribute(
986 record, [&cda]() { return cda.IsFinal(); }, "final");
987 }
988 }
989
990 template <typename T, pandasm::Value::Type VALUE_TYPE>
SetMetadata(panda_file::FieldDataAccessor & accessor,pandasm::Field * field) const991 void Disassembler::SetMetadata(panda_file::FieldDataAccessor &accessor, pandasm::Field *field) const
992 {
993 std::optional<T> val = accessor.GetValue<T>();
994 if (val.has_value()) {
995 field->metadata->SetValue(pandasm::ScalarValue::Create<VALUE_TYPE>(val.value()));
996 }
997 }
998
GetMetadataFieldValue(panda_file::FieldDataAccessor & fieldAccessor,pandasm::Field * field) const999 void Disassembler::GetMetadataFieldValue(panda_file::FieldDataAccessor &fieldAccessor, pandasm::Field *field) const
1000 {
1001 static const std::unordered_map<panda_file::Type::TypeId,
1002 std::function<void(panda_file::FieldDataAccessor &, pandasm::Field *)>>
1003 HANDLERS = {
1004 {panda_file::Type::TypeId::U1,
1005 [this](auto &accessor, auto *f) { SetMetadata<bool, pandasm::Value::Type::U1>(accessor, f); }},
1006 {panda_file::Type::TypeId::U8,
1007 [this](auto &accessor, auto *f) { SetMetadata<uint8_t, pandasm::Value::Type::U8>(accessor, f); }},
1008 {panda_file::Type::TypeId::U16,
1009 [this](auto &accessor, auto *f) { SetMetadata<uint16_t, pandasm::Value::Type::U16>(accessor, f); }},
1010 {panda_file::Type::TypeId::U32,
1011 [this](auto &accessor, auto *f) { SetMetadata<uint32_t, pandasm::Value::Type::U32>(accessor, f); }},
1012 {panda_file::Type::TypeId::F64,
1013 [this](auto &accessor, auto *f) { SetMetadata<double, pandasm::Value::Type::F64>(accessor, f); }},
1014 {panda_file::Type::TypeId::I8,
1015 [this](auto &accessor, auto *f) { SetMetadata<int8_t, pandasm::Value::Type::I8>(accessor, f); }},
1016 {panda_file::Type::TypeId::I16,
1017 [this](auto &accessor, auto *f) { SetMetadata<int16_t, pandasm::Value::Type::I16>(accessor, f); }},
1018 {panda_file::Type::TypeId::I32,
1019 [this](auto &accessor, auto *f) { SetMetadata<int32_t, pandasm::Value::Type::I32>(accessor, f); }},
1020 {panda_file::Type::TypeId::I64,
1021 [this](auto &accessor, auto *f) { SetMetadata<int64_t, pandasm::Value::Type::I64>(accessor, f); }},
1022 };
1023
1024 auto it = HANDLERS.find(field->type.GetId());
1025 if (it != HANDLERS.end()) {
1026 it->second(fieldAccessor, field);
1027 } else if (field->type.GetId() == panda_file::Type::TypeId::REFERENCE &&
1028 field->type.GetName() == "std/core/String") {
1029 std::optional<uint32_t> stringOffsetVal = fieldAccessor.GetValue<uint32_t>();
1030 if (stringOffsetVal.has_value()) {
1031 std::string_view val {reinterpret_cast<const char *>(
1032 file_->GetStringData(panda_file::File::EntityId(stringOffsetVal.value())).data)};
1033 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(val));
1034 }
1035 } else if (field->type.GetRank() > 0) {
1036 std::optional<uint32_t> litarrayOffsetVal = fieldAccessor.GetValue<uint32_t>();
1037 if (litarrayOffsetVal.has_value()) {
1038 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(
1039 std::string_view {std::to_string(litarrayOffsetVal.value())}));
1040 }
1041 }
1042 }
1043
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & fieldId) const1044 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &fieldId) const
1045 {
1046 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << fieldId << " (0x" << std::hex << fieldId << ")";
1047
1048 if (field == nullptr) {
1049 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
1050
1051 return;
1052 }
1053
1054 panda_file::FieldDataAccessor fieldAccessor(*file_, fieldId);
1055
1056 SetEntityAttribute(
1057 field, [&fieldAccessor]() { return fieldAccessor.IsExternal(); }, "external");
1058
1059 SetEntityAttribute(
1060 field, [&fieldAccessor]() { return fieldAccessor.IsStatic(); }, "static");
1061
1062 SetEntityAttributeValue(
1063 field, [&fieldAccessor]() { return fieldAccessor.IsPublic(); }, "access.field", "public");
1064
1065 SetEntityAttributeValue(
1066 field, [&fieldAccessor]() { return fieldAccessor.IsProtected(); }, "access.field", "protected");
1067
1068 SetEntityAttributeValue(
1069 field, [&fieldAccessor]() { return fieldAccessor.IsPrivate(); }, "access.field", "private");
1070
1071 SetEntityAttribute(
1072 field, [&fieldAccessor]() { return fieldAccessor.IsFinal(); }, "final");
1073 GetMetadataFieldValue(fieldAccessor, field);
1074 }
1075
AnnotationTagToString(const char tag) const1076 std::string Disassembler::AnnotationTagToString(const char tag) const
1077 {
1078 static const std::unordered_map<char, std::string> TAG_TO_STRING = {{'1', "u1"},
1079 {'2', "i8"},
1080 {'3', "u8"},
1081 {'4', "i16"},
1082 {'5', "u16"},
1083 {'6', "i32"},
1084 {'7', "u32"},
1085 {'8', "i64"},
1086 {'9', "u64"},
1087 {'A', "f32"},
1088 {'B', "f64"},
1089 {'C', "string"},
1090 {'D', "record"},
1091 {'E', "method"},
1092 {'F', "enum"},
1093 {'G', "annotation"},
1094 {'J', "method_handle"},
1095 {'H', "array"},
1096 {'K', "u1[]"},
1097 {'L', "i8[]"},
1098 {'M', "u8[]"},
1099 {'N', "i16[]"},
1100 {'O', "u16[]"},
1101 {'P', "i32[]"},
1102 {'Q', "u32[]"},
1103 {'R', "i64[]"},
1104 {'S', "u64[]"},
1105 {'T', "f32[]"},
1106 {'U', "f64[]"},
1107 {'V', "string[]"},
1108 {'W', "record[]"},
1109 {'X', "method[]"},
1110 {'Y', "enum[]"},
1111 {'Z', "annotation[]"},
1112 {'@', "method_handle[]"},
1113 {'*', "nullptr_string"}};
1114
1115 return TAG_TO_STRING.at(tag);
1116 }
1117
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1118 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1119 {
1120 std::stringstream ss;
1121
1122 if (type == "i8") {
1123 auto res = value.Get<int8_t>();
1124 ss << static_cast<int>(res);
1125 } else if (type == "u1" || type == "u8") {
1126 auto res = value.Get<uint8_t>();
1127 ss << static_cast<unsigned int>(res);
1128 } else if (type == "i16") {
1129 ss << value.Get<int16_t>();
1130 } else if (type == "u16") {
1131 ss << value.Get<uint16_t>();
1132 } else if (type == "i32") {
1133 ss << value.Get<int32_t>();
1134 } else if (type == "u32") {
1135 ss << value.Get<uint32_t>();
1136 } else if (type == "i64") {
1137 ss << value.Get<int64_t>();
1138 } else if (type == "u64") {
1139 ss << value.Get<uint64_t>();
1140 } else if (type == "f32") {
1141 ss << value.Get<float>();
1142 } else if (type == "f64") {
1143 ss << value.Get<double>();
1144 } else if (type == "string") {
1145 const auto id = value.Get<panda_file::File::EntityId>();
1146 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1147 } else if (type == "record") {
1148 const auto id = value.Get<panda_file::File::EntityId>();
1149 ss << GetFullRecordName(id);
1150 } else if (type == "method") {
1151 const auto id = value.Get<panda_file::File::EntityId>();
1152 AddMethodToTables(id);
1153 ss << GetMethodSignature(id);
1154 } else if (type == "enum") {
1155 const auto id = value.Get<panda_file::File::EntityId>();
1156 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1157 ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1158 << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1159 } else if (type == "annotation") {
1160 const auto id = value.Get<panda_file::File::EntityId>();
1161 ss << "id_" << id;
1162 } else if (type == "void") {
1163 return std::string();
1164 } else if (type == "method_handle") {
1165 } else if (type == "nullptr_string") {
1166 ss << static_cast<uint32_t>(0);
1167 }
1168
1169 return ss.str();
1170 }
1171
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1172 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1173 const size_t idx)
1174 {
1175 std::stringstream ss;
1176
1177 if (type == "i8") {
1178 auto res = value.Get<int8_t>(idx);
1179 ss << static_cast<int>(res);
1180 } else if (type == "u1" || type == "u8") {
1181 auto res = value.Get<uint8_t>(idx);
1182 ss << static_cast<unsigned int>(res);
1183 } else if (type == "i16") {
1184 ss << (value.Get<int16_t>(idx));
1185 } else if (type == "u16") {
1186 ss << (value.Get<uint16_t>(idx));
1187 } else if (type == "i32") {
1188 ss << (value.Get<int32_t>(idx));
1189 } else if (type == "u32") {
1190 ss << (value.Get<uint32_t>(idx));
1191 } else if (type == "i64") {
1192 ss << (value.Get<int64_t>(idx));
1193 } else if (type == "u64") {
1194 ss << (value.Get<uint64_t>(idx));
1195 } else if (type == "f32") {
1196 ss << value.Get<float>(idx);
1197 } else if (type == "f64") {
1198 ss << value.Get<double>(idx);
1199 } else if (type == "string") {
1200 const auto id = value.Get<panda_file::File::EntityId>(idx);
1201 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1202 } else if (type == "record") {
1203 const auto id = value.Get<panda_file::File::EntityId>(idx);
1204 ss << GetFullRecordName(id);
1205 } else if (type == "method") {
1206 const auto id = value.Get<panda_file::File::EntityId>(idx);
1207 AddMethodToTables(id);
1208 ss << GetMethodSignature(id);
1209 } else if (type == "enum") {
1210 const auto id = value.Get<panda_file::File::EntityId>(idx);
1211 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1212 ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1213 << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1214 } else if (type == "annotation") {
1215 const auto id = value.Get<panda_file::File::EntityId>(idx);
1216 ss << "id_" << id;
1217 } else if (type == "method_handle") {
1218 } else if (type == "nullptr_string") {
1219 }
1220
1221 return ss.str();
1222 }
1223
GetFullMethodName(const panda_file::File::EntityId & methodId) const1224 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &methodId) const
1225 {
1226 ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1227
1228 const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
1229
1230 std::string className = GetFullRecordName(methodAccessor.GetClassId());
1231 if (IsSystemType(className)) {
1232 className = "";
1233 } else {
1234 className += ".";
1235 }
1236
1237 return className + methodNameRaw;
1238 }
1239
GetMethodSignature(const panda_file::File::EntityId & methodId) const1240 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &methodId) const
1241 {
1242 ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1243
1244 pandasm::Function method(GetFullMethodName(methodId), fileLanguage_);
1245 GetParams(&method, methodAccessor.GetProtoId());
1246 GetMetaData(&method, methodId);
1247
1248 auto res = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1249 return method.IsStatic() ? "<static> " + res : res;
1250 }
1251
GetFullRecordName(const panda_file::File::EntityId & classId) const1252 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &classId) const
1253 {
1254 std::string name = StringDataToString(file_->GetStringData(classId));
1255
1256 auto type = pandasm::Type::FromDescriptor(name);
1257 type = pandasm::Type(type.GetComponentName(), type.GetRank());
1258
1259 return type.GetPandasmName();
1260 }
1261
1262 static constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1263
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor,std::stringstream & ss)1264 static void GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor, std::stringstream &ss)
1265 {
1266 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1267 << ", type: 0x" << fieldAccessor.GetType();
1268 }
1269
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor)1270 static std::string GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor)
1271 {
1272 std::stringstream ss;
1273 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1274 << ", type: 0x" << fieldAccessor.GetType();
1275 return ss.str();
1276 }
1277
GetRecordInfo(const panda_file::File::EntityId & recordId,RecordInfo * recordInfo) const1278 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &recordId, RecordInfo *recordInfo) const
1279 {
1280 if (file_->IsExternal(recordId)) {
1281 return;
1282 }
1283
1284 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
1285 std::stringstream ss;
1286
1287 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << classAccessor.GetClassId()
1288 << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << classAccessor.GetSize() << " ("
1289 << std::dec << classAccessor.GetSize() << ")";
1290
1291 recordInfo->recordInfo = ss.str();
1292 ss.str(std::string());
1293
1294 classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
1295 GetFieldInfo(fieldAccessor, ss);
1296
1297 recordInfo->fieldsInfo.push_back(ss.str());
1298
1299 ss.str(std::string());
1300 });
1301 }
1302
GetMethodInfo(const panda_file::File::EntityId & methodId,MethodInfo * methodInfo) const1303 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &methodId, MethodInfo *methodInfo) const
1304 {
1305 panda_file::MethodDataAccessor methodAccessor {*file_, methodId};
1306 std::stringstream ss;
1307
1308 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1309 << methodAccessor.GetMethodId();
1310
1311 if (methodAccessor.GetCodeId().has_value()) {
1312 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1313 << methodAccessor.GetCodeId().value();
1314
1315 GetInsInfo(methodAccessor, methodAccessor.GetCodeId().value(), methodInfo);
1316 } else {
1317 ss << ", <no code>";
1318 }
1319
1320 auto profileSize = methodAccessor.GetProfileSize();
1321 if (profileSize) {
1322 ss << ", profile size: " << profileSize.value();
1323 }
1324
1325 methodInfo->methodInfo = ss.str();
1326
1327 if (methodAccessor.GetCodeId()) {
1328 ASSERT(debugInfoExtractor_ != nullptr);
1329 methodInfo->lineNumberTable = debugInfoExtractor_->GetLineNumberTable(methodId);
1330 methodInfo->localVariableTable = debugInfoExtractor_->GetLocalVariableTable(methodId);
1331
1332 // Add information about parameters into the table
1333 panda_file::CodeDataAccessor codeda(*file_, methodAccessor.GetCodeId().value());
1334 auto argIdx = static_cast<int32_t>(codeda.GetNumVregs());
1335 uint32_t codeSize = codeda.GetCodeSize();
1336 for (const auto &info : debugInfoExtractor_->GetParameterInfo(methodId)) {
1337 panda_file::LocalVariableInfo argInfo {info.name, info.signature, "", argIdx++, 0, codeSize};
1338 methodInfo->localVariableTable.emplace_back(argInfo);
1339 }
1340 }
1341 }
1342
Serialize(const std::string & name,const pandasm::LiteralArray & litArray,std::ostream & os) const1343 void Disassembler::Serialize(const std::string &name, const pandasm::LiteralArray &litArray, std::ostream &os) const
1344 {
1345 if (litArray.literals.empty()) {
1346 return;
1347 }
1348
1349 bool isConst = litArray.literals[0].IsArray();
1350
1351 std::stringstream specifiers {};
1352
1353 if (isConst) {
1354 specifiers << LiteralTagToString(litArray.literals[0].tag) << " " << litArray.literals.size() << " ";
1355 }
1356
1357 os << ".array array_" << name << " " << specifiers.str() << "{";
1358
1359 SerializeValues(litArray, isConst, os);
1360
1361 os << "}\n";
1362 }
1363
LiteralTagToString(const panda_file::LiteralTag & tag) const1364 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1365 {
1366 switch (tag) {
1367 case panda_file::LiteralTag::BOOL:
1368 case panda_file::LiteralTag::ARRAY_U1:
1369 return "u1";
1370 case panda_file::LiteralTag::ARRAY_U8:
1371 return "u8";
1372 case panda_file::LiteralTag::ARRAY_I8:
1373 return "i8";
1374 case panda_file::LiteralTag::ARRAY_U16:
1375 return "u16";
1376 case panda_file::LiteralTag::ARRAY_I16:
1377 return "i16";
1378 case panda_file::LiteralTag::ARRAY_U32:
1379 return "u32";
1380 case panda_file::LiteralTag::INTEGER:
1381 case panda_file::LiteralTag::ARRAY_I32:
1382 return "i32";
1383 case panda_file::LiteralTag::ARRAY_U64:
1384 return "u64";
1385 case panda_file::LiteralTag::BIGINT:
1386 case panda_file::LiteralTag::ARRAY_I64:
1387 return "i64";
1388 case panda_file::LiteralTag::FLOAT:
1389 case panda_file::LiteralTag::ARRAY_F32:
1390 return "f32";
1391 case panda_file::LiteralTag::DOUBLE:
1392 case panda_file::LiteralTag::ARRAY_F64:
1393 return "f64";
1394 case panda_file::LiteralTag::STRING:
1395 case panda_file::LiteralTag::ARRAY_STRING:
1396 return pandasm::Type::FromDescriptor(panda_file::GetStringClassDescriptor(fileLanguage_)).GetPandasmName();
1397 case panda_file::LiteralTag::ACCESSOR:
1398 return "accessor";
1399 case panda_file::LiteralTag::NULLVALUE:
1400 return "nullvalue";
1401 case panda_file::LiteralTag::METHODAFFILIATE:
1402 return "method_affiliate";
1403 case panda_file::LiteralTag::METHOD:
1404 return "method";
1405 case panda_file::LiteralTag::GENERATORMETHOD:
1406 return "generator_method";
1407 case panda_file::LiteralTag::LITERALARRAY:
1408 return "lit_offset";
1409 default:
1410 LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
1411 UNREACHABLE();
1412 }
1413 }
1414
SerializeLiterals(const pandasm::LiteralArray::Literal & lit) const1415 std::string Disassembler::SerializeLiterals(const pandasm::LiteralArray::Literal &lit) const
1416 {
1417 std::stringstream res {};
1418 const auto &val = lit.value;
1419 switch (lit.tag) {
1420 case panda_file::LiteralTag::BOOL: {
1421 res << (std::get<bool>(val));
1422 break;
1423 }
1424 case panda_file::LiteralTag::INTEGER: {
1425 res << (bit_cast<int32_t>(std::get<uint32_t>(val)));
1426 break;
1427 }
1428 case panda_file::LiteralTag::DOUBLE: {
1429 res << (std::get<double>(val));
1430 break;
1431 }
1432 case panda_file::LiteralTag::STRING: {
1433 res << "\"" << (std::get<std::string>(val)) << "\"";
1434 break;
1435 }
1436 case panda_file::LiteralTag::METHOD:
1437 case panda_file::LiteralTag::GENERATORMETHOD: {
1438 res << (std::get<std::string>(val));
1439 break;
1440 }
1441 case panda_file::LiteralTag::NULLVALUE:
1442 case panda_file::LiteralTag::ACCESSOR: {
1443 res << (static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val))));
1444 break;
1445 }
1446 case panda_file::LiteralTag::METHODAFFILIATE: {
1447 res << (std::get<uint16_t>(val));
1448 break;
1449 }
1450 case panda_file::LiteralTag::LITERALARRAY: {
1451 res << (std::get<std::string>(val));
1452 break;
1453 }
1454 default:
1455 UNREACHABLE();
1456 }
1457 res << ", ";
1458 return res.str();
1459 }
1460
LiteralValueToString(const pandasm::LiteralArray::Literal & lit) const1461 std::string Disassembler::LiteralValueToString(const pandasm::LiteralArray::Literal &lit) const
1462 {
1463 if (lit.IsBoolValue()) {
1464 std::stringstream res {};
1465 res << (std::get<bool>(lit.value));
1466 return res.str();
1467 }
1468
1469 if (lit.IsByteValue()) {
1470 return LiteralIntegralValueToString<uint8_t>(lit);
1471 }
1472
1473 if (lit.IsShortValue()) {
1474 return LiteralIntegralValueToString<uint16_t>(lit);
1475 }
1476
1477 if (lit.IsIntegerValue()) {
1478 return LiteralIntegralValueToString<uint32_t>(lit);
1479 }
1480
1481 if (lit.IsLongValue()) {
1482 return LiteralIntegralValueToString<uint64_t>(lit);
1483 }
1484
1485 if (lit.IsDoubleValue()) {
1486 std::stringstream res {};
1487 res << std::get<double>(lit.value);
1488 return res.str();
1489 }
1490
1491 if (lit.IsFloatValue()) {
1492 std::stringstream res {};
1493 res << std::get<float>(lit.value);
1494 return res.str();
1495 }
1496
1497 if (lit.IsStringValue()) {
1498 std::stringstream res {};
1499 res << "\"" << std::get<std::string>(lit.value) << "\"";
1500 return res.str();
1501 }
1502
1503 if (lit.IsLiteralArrayValue()) {
1504 return SerializeLiterals(lit);
1505 }
1506
1507 UNREACHABLE();
1508 }
1509
SerializeValues(const pandasm::LiteralArray & litArray,const bool isConst,std::ostream & os) const1510 void Disassembler::SerializeValues(const pandasm::LiteralArray &litArray, const bool isConst, std::ostream &os) const
1511 {
1512 std::string separator = (isConst) ? (" ") : ("\n");
1513
1514 os << separator;
1515
1516 if (isConst) {
1517 for (const auto &l : litArray.literals) {
1518 os << LiteralValueToString(l) << separator;
1519 }
1520 } else {
1521 for (const auto &l : litArray.literals) {
1522 os << "\t" << LiteralTagToString(l.tag) << " " << LiteralValueToString(l) << separator;
1523 }
1524 }
1525 }
1526
Serialize(const pandasm::Record & record,std::ostream & os,bool printInformation) const1527 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1528 {
1529 if (IsSystemType(record.name)) {
1530 return;
1531 }
1532
1533 os << ".record " << record.name;
1534
1535 const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1536 const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1537 if (recordInTable) {
1538 Serialize(*record.metadata, recordIter->second.annList, os);
1539 } else {
1540 Serialize(*record.metadata, {}, os);
1541 }
1542
1543 if (record.metadata->IsForeign() && record.fieldList.empty()) {
1544 os << "\n\n";
1545 return;
1546 }
1547
1548 os << " {";
1549
1550 if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1551 os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1552 SerializeFields(record, os, true);
1553 } else {
1554 os << "\n";
1555 SerializeFields(record, os, false);
1556 }
1557
1558 os << "}\n\n";
1559 }
1560
DumpLiteralArray(const pandasm::LiteralArray & literalArray,std::stringstream & ss) const1561 void Disassembler::DumpLiteralArray(const pandasm::LiteralArray &literalArray, std::stringstream &ss) const
1562 {
1563 ss << "[";
1564 bool firstItem = true;
1565 for (const auto &item : literalArray.literals) {
1566 if (!firstItem) {
1567 ss << ", ";
1568 } else {
1569 firstItem = false;
1570 }
1571
1572 switch (item.tag) {
1573 case panda_file::LiteralTag::INTEGER: {
1574 ss << std::get<uint32_t>(item.value); // CC-OFF(G.EXP.30-CPP) false positive
1575 break;
1576 }
1577 case panda_file::LiteralTag::DOUBLE: {
1578 ss << std::get<double>(item.value);
1579 break;
1580 }
1581 case panda_file::LiteralTag::BOOL: {
1582 ss << std::get<bool>(item.value);
1583 break;
1584 }
1585 case panda_file::LiteralTag::STRING: {
1586 ss << "\"" << std::get<std::string>(item.value) << "\"";
1587 break;
1588 }
1589 case panda_file::LiteralTag::LITERALARRAY: {
1590 std::string offsetStr = std::get<std::string>(item.value);
1591 const int hexBase = 16;
1592 const char *begin = offsetStr.data();
1593 const char *end = &(*offsetStr.end());
1594 uint32_t litArrayOffset = 0;
1595 std::from_chars(begin, end, litArrayOffset, hexBase);
1596 pandasm::LiteralArray litArray;
1597 GetLiteralArrayByOffset(&litArray, panda_file::File::EntityId(litArrayOffset));
1598 DumpLiteralArray(litArray, ss);
1599 break;
1600 }
1601 default: {
1602 UNREACHABLE();
1603 break;
1604 }
1605 }
1606 }
1607 ss << "]";
1608 }
1609
SerializeFieldValue(const pandasm::Field & f,std::stringstream & ss) const1610 void Disassembler::SerializeFieldValue(const pandasm::Field &f, std::stringstream &ss) const
1611 {
1612 if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1613 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1614 } else if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1615 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1616 } else if (f.type.GetId() == panda_file::Type::TypeId::I8) {
1617 ss << " = 0x" << std::hex << static_cast<int32_t>(f.metadata->GetValue().value().GetValue<int8_t>());
1618 } else if (f.type.GetId() == panda_file::Type::TypeId::F64) {
1619 ss << " = " << static_cast<double>(f.metadata->GetValue().value().GetValue<double>());
1620 } else if (f.type.GetId() == panda_file::Type::TypeId::U1) {
1621 ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<bool>());
1622 } else if (f.type.GetId() == panda_file::Type::TypeId::I32) {
1623 ss << " = " << f.metadata->GetValue().value().GetValue<int>();
1624 } else if (f.type.GetId() == panda_file::Type::TypeId::REFERENCE && f.type.GetName() == "std/core/String") {
1625 ss << " = \"" << static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()) << "\"";
1626 } else if (f.type.GetRank() > 0) {
1627 uint32_t litArrayOffset = 0;
1628 auto value = f.metadata->GetValue().value().GetValue<std::string>();
1629 std::from_chars(value.data(), &(*value.end()), litArrayOffset);
1630 pandasm::LiteralArray litArray;
1631 GetLiteralArrayByOffset(&litArray, panda_file::File::EntityId(litArrayOffset));
1632 ss << " = ";
1633 DumpLiteralArray(litArray, ss);
1634 }
1635 }
1636
SerializeFields(const pandasm::Record & record,std::ostream & os,bool printInformation) const1637 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1638 {
1639 constexpr size_t INFO_OFFSET = 80;
1640
1641 const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1642 const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1643
1644 const auto recInf = (printInformation) ? (progInfo_.recordsInfo.at(record.name)) : (RecordInfo {});
1645
1646 size_t fieldIdx = 0;
1647
1648 std::stringstream ss;
1649 for (const auto &f : record.fieldList) {
1650 ss << "\t" << f.type.GetPandasmName() << " " << f.name;
1651 if (f.metadata->GetValue().has_value()) {
1652 SerializeFieldValue(f, ss);
1653 }
1654 if (recordInTable) {
1655 const auto fieldIter = recordIter->second.fieldAnnotations.find(f.name);
1656 if (fieldIter != recordIter->second.fieldAnnotations.end()) {
1657 Serialize(*f.metadata, fieldIter->second, ss);
1658 } else {
1659 Serialize(*f.metadata, {}, ss);
1660 }
1661 } else {
1662 Serialize(*f.metadata, {}, ss);
1663 }
1664
1665 if (printInformation) {
1666 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << recInf.fieldsInfo.at(fieldIdx) << "\n";
1667 } else {
1668 os << ss.str() << "\n";
1669 }
1670
1671 ss.str(std::string());
1672 ss.clear();
1673
1674 fieldIdx++;
1675 }
1676 }
1677
Serialize(const pandasm::Function::CatchBlock & catchBlock,std::ostream & os) const1678 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catchBlock, std::ostream &os) const
1679 {
1680 if (catchBlock.exceptionRecord.empty()) {
1681 os << ".catchall ";
1682 } else {
1683 os << ".catch " << catchBlock.exceptionRecord << ", ";
1684 }
1685
1686 os << catchBlock.tryBeginLabel << ", " << catchBlock.tryEndLabel << ", " << catchBlock.catchBeginLabel;
1687
1688 if (!catchBlock.catchEndLabel.empty()) {
1689 os << ", " << catchBlock.catchEndLabel;
1690 }
1691 }
1692
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & annList,std::ostream & os) const1693 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &annList, std::ostream &os) const
1694 {
1695 auto boolAttributes = meta.GetBoolAttributes();
1696 auto attributes = meta.GetAttributes();
1697 if (boolAttributes.empty() && attributes.empty() && annList.empty()) {
1698 return;
1699 }
1700
1701 os << " <";
1702
1703 size_t size = boolAttributes.size();
1704 size_t idx = 0;
1705 for (const auto &attr : boolAttributes) {
1706 os << attr;
1707 ++idx;
1708
1709 if (!attributes.empty() || !annList.empty() || idx < size) {
1710 os << ", ";
1711 }
1712 }
1713
1714 size = attributes.size();
1715 idx = 0;
1716 for (const auto &[key, values] : attributes) {
1717 for (size_t i = 0; i < values.size(); i++) {
1718 os << key << "=" << values[i];
1719
1720 if (i < values.size() - 1) {
1721 os << ", ";
1722 }
1723 }
1724
1725 ++idx;
1726
1727 if (!annList.empty() || idx < size) {
1728 os << ", ";
1729 }
1730 }
1731
1732 size = annList.size();
1733 idx = 0;
1734 for (const auto &[key, value] : annList) {
1735 os << key << "=" << value;
1736
1737 ++idx;
1738
1739 if (idx < size) {
1740 os << ", ";
1741 }
1742 }
1743
1744 os << ">";
1745 }
1746
SerializeLineNumberTable(const panda_file::LineNumberTable & lineNumberTable,std::ostream & os) const1747 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &lineNumberTable, std::ostream &os) const
1748 {
1749 if (lineNumberTable.empty()) {
1750 return;
1751 }
1752
1753 os << "\n# LINE_NUMBER_TABLE:\n";
1754 for (const auto &lineInfo : lineNumberTable) {
1755 os << "#\tline " << lineInfo.line << ": " << lineInfo.offset << "\n";
1756 }
1757 }
1758
SerializeLocalVariableTable(const panda_file::LocalVariableTable & localVariableTable,const pandasm::Function & method,std::ostream & os) const1759 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &localVariableTable,
1760 const pandasm::Function &method, std::ostream &os) const
1761 {
1762 if (localVariableTable.empty()) {
1763 return;
1764 }
1765
1766 os << "\n# LOCAL_VARIABLE_TABLE:\n";
1767 os << "#\t Start End Register Name Signature\n";
1768 const int startWidth = 5;
1769 const int endWidth = 4;
1770 const int regWidth = 8;
1771 const int nameWidth = 14;
1772 for (const auto &variableInfo : localVariableTable) {
1773 std::ostringstream regStream;
1774 regStream << variableInfo.regNumber << '(';
1775 if (variableInfo.regNumber < 0) {
1776 regStream << "acc";
1777 } else {
1778 uint32_t vreg = variableInfo.regNumber;
1779 uint32_t firstArgReg = method.GetTotalRegs();
1780 if (vreg < firstArgReg) {
1781 regStream << 'v' << vreg;
1782 } else {
1783 regStream << 'a' << vreg - firstArgReg;
1784 }
1785 }
1786 regStream << ')';
1787
1788 os << "#\t " << std::setw(startWidth) << std::right << variableInfo.startOffset << " ";
1789 os << std::setw(endWidth) << std::right << variableInfo.endOffset << " ";
1790 os << std::setw(regWidth) << std::right << regStream.str() << " ";
1791 os << std::setw(nameWidth) << std::right << variableInfo.name << " " << variableInfo.type;
1792 if (!variableInfo.typeSignature.empty() && variableInfo.typeSignature != variableInfo.type) {
1793 os << " (" << variableInfo.typeSignature << ")";
1794 }
1795 os << "\n";
1796 }
1797 }
1798
SerializeLanguage(std::ostream & os) const1799 void Disassembler::SerializeLanguage(std::ostream &os) const
1800 {
1801 os << ".language " << ark::panda_file::LanguageToString(fileLanguage_) << "\n\n";
1802 }
1803
SerializeFilename(std::ostream & os) const1804 void Disassembler::SerializeFilename(std::ostream &os) const
1805 {
1806 if (file_ == nullptr || file_->GetFilename().empty()) {
1807 return;
1808 }
1809
1810 os << "# source binary: " << file_->GetFilename() << "\n\n";
1811 }
1812
SerializeLitArrays(std::ostream & os,bool addSeparators) const1813 void Disassembler::SerializeLitArrays(std::ostream &os, bool addSeparators) const
1814 {
1815 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
1816
1817 if (prog_.literalarrayTable.empty()) {
1818 return;
1819 }
1820
1821 if (addSeparators) {
1822 os << "# ====================\n"
1823 "# LITERALS\n\n";
1824 }
1825
1826 for (const auto &pair : prog_.literalarrayTable) {
1827 Serialize(pair.first, pair.second, os);
1828 }
1829
1830 os << "\n";
1831 }
1832
SerializeRecords(std::ostream & os,bool addSeparators,bool printInformation) const1833 void Disassembler::SerializeRecords(std::ostream &os, bool addSeparators, bool printInformation) const
1834 {
1835 LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
1836
1837 if (prog_.recordTable.empty()) {
1838 return;
1839 }
1840
1841 if (addSeparators) {
1842 os << "# ====================\n"
1843 "# RECORDS\n\n";
1844 }
1845
1846 for (const auto &r : prog_.recordTable) {
1847 Serialize(r.second, os, printInformation);
1848 }
1849 }
1850
SerializeMethods(std::ostream & os,bool addSeparators,bool printInformation) const1851 void Disassembler::SerializeMethods(std::ostream &os, bool addSeparators, bool printInformation) const
1852 {
1853 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
1854
1855 if (prog_.functionInstanceTable.empty() && prog_.functionStaticTable.empty()) {
1856 return;
1857 }
1858
1859 if (addSeparators) {
1860 os << "# ====================\n"
1861 "# METHODS\n\n";
1862 }
1863
1864 for (const auto &m : prog_.functionStaticTable) {
1865 Serialize(m.second, os, printInformation);
1866 }
1867 for (const auto &m : prog_.functionInstanceTable) {
1868 Serialize(m.second, os, printInformation);
1869 }
1870 }
1871
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1872 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1873 {
1874 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1875 }
1876
IDToString(BytecodeInstruction bcIns,panda_file::File::EntityId methodId) const1877 std::string Disassembler::IDToString(BytecodeInstruction bcIns, panda_file::File::EntityId methodId) const
1878 {
1879 std::stringstream name;
1880
1881 if (bcIns.HasFlag(BytecodeInstruction::Flags::TYPE_ID)) {
1882 auto idx = bcIns.GetId().AsIndex();
1883 auto id = file_->ResolveClassIndex(methodId, idx);
1884 auto type = pandasm::Type::FromDescriptor(StringDataToString(file_->GetStringData(id)));
1885
1886 name.str("");
1887 name << type.GetPandasmName();
1888 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID) ||
1889 bcIns.HasFlag(BytecodeInstruction::Flags::STATIC_METHOD_ID)) {
1890 auto idx = bcIns.GetId().AsIndex();
1891 auto id = file_->ResolveMethodIndex(methodId, idx);
1892
1893 name << GetMethodSignature(id);
1894 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
1895 name << '\"';
1896
1897 if (skipStrings_ || quiet_) {
1898 name << std::hex << "0x" << bcIns.GetId().AsFileId();
1899 } else {
1900 name << StringDataToString(file_->GetStringData(bcIns.GetId().AsFileId()));
1901 }
1902
1903 name << '\"';
1904 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID) ||
1905 bcIns.HasFlag(BytecodeInstruction::Flags::STATIC_FIELD_ID)) {
1906 auto idx = bcIns.GetId().AsIndex();
1907 auto id = file_->ResolveFieldIndex(methodId, idx);
1908 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1909
1910 auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1911 name << recordName << '.';
1912 name << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1913 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
1914 auto index = bcIns.GetId().AsIndex();
1915 name << "array_" << index;
1916 }
1917
1918 return name.str();
1919 }
1920
GetRecordLanguage(panda_file::File::EntityId classId) const1921 ark::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId classId) const
1922 {
1923 if (file_->IsExternal(classId)) {
1924 return ark::panda_file::SourceLang::PANDA_ASSEMBLY;
1925 }
1926
1927 panda_file::ClassDataAccessor cda(*file_, classId);
1928 return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1929 }
1930
1931 // CC-OFFNXT(G.FUN.01) solid logic
TranslateImmToLabel(pandasm::Ins * paIns,LabelTable * labelTable,const uint8_t * insArr,BytecodeInstruction bcIns,BytecodeInstruction bcInsLast,panda_file::File::EntityId codeId)1932 static void TranslateImmToLabel(pandasm::Ins *paIns, LabelTable *labelTable, const uint8_t *insArr,
1933 BytecodeInstruction bcIns, BytecodeInstruction bcInsLast,
1934 panda_file::File::EntityId codeId)
1935 {
1936 const int32_t jmpOffset = std::get<int64_t>(paIns->imms.at(0));
1937 const auto bcInsDest = bcIns.JumpTo(jmpOffset);
1938 if (bcInsLast.GetAddress() > bcInsDest.GetAddress()) {
1939 size_t idx = GetBytecodeInstructionNumber(BytecodeInstruction(insArr), bcInsDest);
1940 if (idx != std::numeric_limits<size_t>::max()) {
1941 if (labelTable->find(idx) == labelTable->end()) {
1942 std::stringstream ss;
1943 ss << "jump_label_" << labelTable->size();
1944 (*labelTable)[idx] = ss.str();
1945 }
1946
1947 paIns->imms.clear();
1948 paIns->ids.push_back(labelTable->at(idx));
1949 } else {
1950 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1951 << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1952 << ": invalid jump offset 0x" << jmpOffset
1953 << " - jumping in the middle of another instruction!";
1954 }
1955 } else {
1956 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1957 << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1958 << ": invalid jump offset 0x" << jmpOffset << " - jumping out of bounds!";
1959 }
1960 }
1961
CollectExternalFields(const panda_file::FieldDataAccessor & fieldAccessor)1962 void Disassembler::CollectExternalFields(const panda_file::FieldDataAccessor &fieldAccessor)
1963 {
1964 auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1965
1966 pandasm::Field field(fileLanguage_);
1967 GetField(field, fieldAccessor);
1968 if (field.name.empty()) {
1969 return;
1970 }
1971
1972 auto &fieldList = externalFieldTable_[recordName];
1973 auto retField = std::find_if(fieldList.begin(), fieldList.end(), [&field](pandasm::Field &fieldFromList) {
1974 return field.name == fieldFromList.name && field.IsStatic() == fieldFromList.IsStatic();
1975 });
1976 if (retField == fieldList.end()) {
1977 fieldList.emplace_back(std::move(field));
1978
1979 externalFieldsInfoTable_[recordName].emplace_back(GetFieldInfo(fieldAccessor));
1980 }
1981 }
1982
1983 // CC-OFFNXT(huge_method) solid logic
GetInstructions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId)1984 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId methodId,
1985 panda_file::File::EntityId codeId)
1986 {
1987 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
1988
1989 const auto insArr = codeAccessor.GetInstructions();
1990
1991 method->regsNum = codeAccessor.GetNumVregs();
1992
1993 auto bcIns = BytecodeInstruction(insArr);
1994 auto from = bcIns.GetAddress();
1995 const auto bcInsLast = bcIns.JumpTo(codeAccessor.GetCodeSize());
1996
1997 LabelTable labelTable = GetExceptions(method, methodId, codeId);
1998
1999 IdList unknownExternalMethods {};
2000
2001 while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
2002 if (bcIns.GetAddress() > bcInsLast.GetAddress()) {
2003 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
2004 << "). bytecode instructions sequence corrupted for method " << method->name
2005 << "! went out of bounds";
2006
2007 break;
2008 }
2009
2010 if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID) ||
2011 bcIns.HasFlag(BytecodeInstruction::Flags::STATIC_FIELD_ID)) {
2012 auto idx = bcIns.GetId().AsIndex();
2013 auto id = file_->ResolveFieldIndex(methodId, idx);
2014 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
2015
2016 if (fieldAccessor.IsExternal()) {
2017 CollectExternalFields(fieldAccessor);
2018 }
2019 }
2020
2021 auto paIns = BytecodeInstructionToPandasmInstruction(bcIns, methodId);
2022 paIns.insDebug.boundLeft =
2023 bcIns.GetAddress() - from; // It is used to produce a line table during method serialization
2024 if (paIns.IsJump()) {
2025 TranslateImmToLabel(&paIns, &labelTable, insArr, bcIns, bcInsLast, codeId);
2026 }
2027
2028 // check if method id is unknown external method. if so, emplace it in table
2029 if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID) ||
2030 bcIns.HasFlag(BytecodeInstruction::Flags::STATIC_METHOD_ID)) {
2031 const auto argMethodIdx = bcIns.GetId().AsIndex();
2032 const auto argMethodId = file_->ResolveMethodIndex(methodId, argMethodIdx);
2033
2034 const auto argMethodSignature = GetMethodSignature(argMethodId);
2035 panda_file::MethodDataAccessor methodAccessor(*file_, argMethodId);
2036 const auto &functionTable =
2037 methodAccessor.IsStatic() ? prog_.functionStaticTable : prog_.functionInstanceTable;
2038 const bool isPresent = functionTable.find(argMethodSignature) != functionTable.cend();
2039 const bool isExternal = file_->IsExternal(argMethodId);
2040 if (isExternal && !isPresent) {
2041 unknownExternalMethods.push_back(argMethodId);
2042 }
2043 }
2044
2045 method->ins.push_back(paIns);
2046 bcIns = bcIns.GetNext();
2047 }
2048
2049 for (const auto &pair : labelTable) {
2050 method->ins[pair.first].label = pair.second;
2051 method->ins[pair.first].setLabel = true;
2052 }
2053
2054 return unknownExternalMethods;
2055 }
2056
2057 } // namespace ark::disasm
2058