1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "disassembler.h"
17 #include "class_data_accessor.h"
18 #include "field_data_accessor.h"
19 #include "libpandafile/type_helper.h"
20 #include "literal_data_accessor.h"
21 #include "mangling.h"
22 #include "utils/logger.h"
23
24 #include <cstdint>
25 #include <iomanip>
26
27 #include "get_language_specific_metadata.inc"
28
29 namespace ark::disasm {
30
Disassemble(std::string_view filenameIn,const bool quiet,const bool skipStrings)31 void Disassembler::Disassemble(std::string_view filenameIn, const bool quiet, const bool skipStrings)
32 {
33 auto file = panda_file::File::Open(filenameIn);
34 if (file == nullptr) {
35 LOG(FATAL, DISASSEMBLER) << "> unable to open specified pandafile: <" << filenameIn << ">";
36 }
37
38 Disassemble(file, quiet, skipStrings);
39 }
40
Disassemble(const panda_file::File & file,const bool quiet,const bool skipStrings)41 void Disassembler::Disassemble(const panda_file::File &file, const bool quiet, const bool skipStrings)
42 {
43 SetFile(file);
44 DisassembleImpl(quiet, skipStrings);
45 }
46
Disassemble(std::unique_ptr<const panda_file::File> & file,const bool quiet,const bool skipStrings)47 void Disassembler::Disassemble(std::unique_ptr<const panda_file::File> &file, const bool quiet, const bool skipStrings)
48 {
49 SetFile(file);
50 DisassembleImpl(quiet, skipStrings);
51 }
52
DisassembleImpl(const bool quiet,const bool skipStrings)53 void Disassembler::DisassembleImpl(const bool quiet, const bool skipStrings)
54 {
55 prog_ = pandasm::Program {};
56
57 recordNameToId_.clear();
58 methodNameToId_.clear();
59
60 skipStrings_ = skipStrings;
61 quiet_ = quiet;
62
63 progInfo_ = ProgInfo {};
64
65 progAnn_ = ProgAnnotations {};
66
67 GetLiteralArrays();
68 GetRecords();
69
70 AddExternalFieldsToRecords();
71 GetLanguageSpecificMetadata();
72 }
73
SetFile(std::unique_ptr<const panda_file::File> & file)74 void Disassembler::SetFile(std::unique_ptr<const panda_file::File> &file)
75 {
76 fileHolder_.swap(file);
77 file_ = fileHolder_.get();
78 }
79
SetFile(const panda_file::File & file)80 void Disassembler::SetFile(const panda_file::File &file)
81 {
82 fileHolder_.reset();
83 file_ = &file;
84 }
85
SetProfile(std::string_view fname)86 void Disassembler::SetProfile(std::string_view fname)
87 {
88 std::ifstream stm(fname.data(), std::ios::binary);
89 if (!stm.is_open()) {
90 LOG(FATAL, DISASSEMBLER) << "Cannot open profile file";
91 }
92
93 auto res = profiling::ReadProfile(stm, fileLanguage_);
94 if (!res) {
95 LOG(FATAL, DISASSEMBLER) << "Failed to deserialize: " << res.Error();
96 }
97 profile_ = res.Value();
98 }
99
GetInsInfo(panda_file::MethodDataAccessor & mda,const panda_file::File::EntityId & codeId,MethodInfo * methodInfo) const100 void Disassembler::GetInsInfo(panda_file::MethodDataAccessor &mda, const panda_file::File::EntityId &codeId,
101 MethodInfo *methodInfo /* out */) const
102 {
103 const static size_t FORMAT_WIDTH = 20;
104 const static size_t INSTRUCTION_WIDTH = 2;
105
106 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
107
108 std::string methodName = mda.GetFullName();
109 auto prof = profiling::INVALID_PROFILE;
110 if (profile_ != profiling::INVALID_PROFILE) {
111 prof = profiling::FindMethodInProfile(profile_, fileLanguage_, methodName);
112 }
113
114 auto insSz = codeAccessor.GetCodeSize();
115 auto insArr = codeAccessor.GetInstructions();
116
117 auto bcIns = BytecodeInstruction(insArr);
118 auto bcInsLast = bcIns.JumpTo(insSz);
119
120 while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
121 std::stringstream ss;
122
123 uintptr_t bc = bcIns.GetAddress() - BytecodeInstruction(insArr).GetAddress();
124 ss << "offset: 0x" << std::setfill('0') << std::setw(4U) << std::hex << bc;
125 ss << ", " << std::setfill('.');
126
127 BytecodeInstruction::Format format = bcIns.GetFormat();
128
129 auto formatStr = std::string("[") + BytecodeInstruction::GetFormatString(format) + ']';
130 ss << std::setw(FORMAT_WIDTH) << std::left << formatStr;
131
132 ss << "[";
133
134 const uint8_t *pc = bcIns.GetAddress();
135 const size_t sz = bcIns.GetSize();
136
137 for (size_t i = 0; i < sz; i++) {
138 ss << "0x" << std::setw(INSTRUCTION_WIDTH) << std::setfill('0') << std::right << std::hex
139 << static_cast<int>(pc[i]); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
140
141 if (i != sz - 1) {
142 ss << " ";
143 }
144 }
145
146 ss << "]";
147
148 if (profile_ != profiling::INVALID_PROFILE && prof != profiling::INVALID_PROFILE) {
149 auto profId = bcIns.GetProfileId();
150 if (profId != -1) {
151 ss << ", Profile: ";
152 profiling::DumpProfile(prof, fileLanguage_, &bcIns, ss);
153 }
154 }
155
156 methodInfo->instructionsInfo.push_back(ss.str());
157
158 bcIns = bcIns.GetNext();
159 }
160 }
161
CollectInfo()162 void Disassembler::CollectInfo()
163 {
164 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
165
166 debugInfoExtractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_);
167
168 for (const auto &pair : recordNameToId_) {
169 GetRecordInfo(pair.second, &progInfo_.recordsInfo[pair.first]);
170 }
171
172 for (const auto &pair : methodNameToId_) {
173 GetMethodInfo(pair.second, &progInfo_.methodsInfo[pair.first]);
174 }
175
176 AddExternalFieldsInfoToRecords();
177 }
178
Serialize(std::ostream & os,bool addSeparators,bool printInformation) const179 void Disassembler::Serialize(std::ostream &os, bool addSeparators, bool printInformation) const
180 {
181 if (os.bad()) {
182 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
183
184 return;
185 }
186
187 SerializeFilename(os);
188 SerializeLanguage(os);
189 SerializeLitArrays(os, addSeparators);
190 SerializeRecords(os, addSeparators, printInformation);
191 SerializeMethods(os, addSeparators, printInformation);
192 }
193
SerializePrintStartInfo(const pandasm::Function & method,std::ostringstream & headerSs) const194 void Disassembler::SerializePrintStartInfo(const pandasm::Function &method, std::ostringstream &headerSs) const
195 {
196 headerSs << ".function " << method.returnType.GetPandasmName() << " " << method.name << "(";
197
198 if (!method.params.empty()) {
199 headerSs << method.params[0].type.GetPandasmName() << " a0";
200
201 for (size_t i = 1; i < method.params.size(); i++) {
202 headerSs << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
203 }
204 }
205 headerSs << ")";
206 }
207
SerializeCheckEnd(const pandasm::Function & method,std::ostream & os,bool printMethodInfo,const MethodInfo * & methodInfo) const208 void Disassembler::SerializeCheckEnd(const pandasm::Function &method, std::ostream &os, bool printMethodInfo,
209 const MethodInfo *&methodInfo) const
210 {
211 if (!method.catchBlocks.empty()) {
212 os << "\n";
213
214 for (const auto &catchBlock : method.catchBlocks) {
215 Serialize(catchBlock, os);
216 os << "\n";
217 }
218 }
219
220 if (printMethodInfo) {
221 ASSERT(methodInfo != nullptr);
222 SerializeLineNumberTable(methodInfo->lineNumberTable, os);
223 SerializeLocalVariableTable(methodInfo->localVariableTable, method, os);
224 }
225
226 os << "}\n\n";
227 }
228
SerializeIfPrintMethodInfo(const pandasm::Function & method,bool printMethodInfo,std::ostringstream & headerSs,const MethodInfo * & methodInfo,std::map<std::string,ark::disasm::MethodInfo>::const_iterator & methodInfoIt) const229 size_t Disassembler::SerializeIfPrintMethodInfo(
230 const pandasm::Function &method, bool printMethodInfo, std::ostringstream &headerSs, const MethodInfo *&methodInfo,
231 std::map<std::string, ark::disasm::MethodInfo>::const_iterator &methodInfoIt) const
232 {
233 size_t width = 0;
234 if (printMethodInfo) {
235 methodInfo = &methodInfoIt->second;
236
237 for (const auto &i : method.ins) {
238 if (i.ToString().size() > width) {
239 width = i.ToString().size();
240 }
241 }
242
243 headerSs << " # " << methodInfo->methodInfo << "\n# CODE:";
244 }
245
246 headerSs << "\n";
247 return width;
248 }
249
Serialize(const pandasm::Function & method,std::ostream & os,bool printInformation,panda_file::LineNumberTable * lineTable) const250 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool printInformation,
251 panda_file::LineNumberTable *lineTable) const
252 {
253 std::ostringstream headerSs;
254 SerializePrintStartInfo(method, headerSs);
255 const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
256 const auto methodIter = progAnn_.methodAnnotations.find(signature);
257 if (methodIter != progAnn_.methodAnnotations.end()) {
258 Serialize(*method.metadata, methodIter->second, headerSs);
259 } else {
260 Serialize(*method.metadata, {}, headerSs);
261 }
262
263 if (!method.HasImplementation()) {
264 headerSs << "\n\n";
265 os << headerSs.str();
266 return;
267 }
268
269 headerSs << " {";
270
271 const MethodInfo *methodInfo = nullptr;
272 auto methodInfoIt = progInfo_.methodsInfo.find(signature);
273 bool printMethodInfo = printInformation && methodInfoIt != progInfo_.methodsInfo.end();
274 size_t width = SerializeIfPrintMethodInfo(method, printMethodInfo, headerSs, methodInfo, methodInfoIt);
275
276 auto headerSsStr = headerSs.str();
277 size_t lineNumber = std::count(headerSsStr.begin(), headerSsStr.end(), '\n') + 1;
278
279 os << headerSsStr;
280
281 for (size_t i = 0; i < method.ins.size(); i++) {
282 std::ostringstream insSs;
283
284 std::string ins = method.ins[i].ToString("", method.GetParamsNum() != 0, method.regsNum);
285 if (method.ins[i].setLabel) {
286 insSs << ins.substr(0, ins.find(": ")) << ":\n";
287 ins.erase(0, ins.find(": ") + std::string(": ").length());
288 }
289
290 insSs << "\t";
291 if (printMethodInfo) {
292 insSs << std::setw(width) << std::left;
293 }
294 insSs << ins;
295 if (printMethodInfo) {
296 ASSERT(methodInfo != nullptr);
297 insSs << " # " << methodInfo->instructionsInfo[i];
298 }
299 insSs << "\n";
300
301 auto insSsStr = insSs.str();
302 lineNumber += std::count(insSsStr.begin(), insSsStr.end(), '\n');
303
304 if (lineTable != nullptr) {
305 lineTable->emplace_back(
306 panda_file::LineTableEntry {static_cast<uint32_t>(method.ins[i].insDebug.boundLeft), lineNumber - 1});
307 }
308
309 os << insSsStr;
310 }
311
312 SerializeCheckEnd(method, os, printMethodInfo, methodInfo);
313 }
314
IsSystemType(const std::string & typeName)315 inline bool Disassembler::IsSystemType(const std::string &typeName)
316 {
317 bool isArrayType = typeName.back() == ']';
318 bool isGlobal = typeName == "_GLOBAL";
319
320 return isArrayType || isGlobal;
321 }
322
GetRecord(pandasm::Record & record,const panda_file::File::EntityId & recordId)323 void Disassembler::GetRecord(pandasm::Record &record, const panda_file::File::EntityId &recordId)
324 {
325 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << recordId << " (0x" << std::hex << recordId << ")";
326
327 record.name = GetFullRecordName(recordId);
328
329 LOG(DEBUG, DISASSEMBLER) << "name: " << record.name;
330
331 GetMetaData(&record, recordId);
332
333 if (!file_->IsExternal(recordId)) {
334 GetMethods(recordId);
335 GetFields(record, recordId);
336 }
337 }
338
AddMethodToTables(const panda_file::File::EntityId & methodId)339 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &methodId)
340 {
341 pandasm::Function newMethod("", fileLanguage_);
342 GetMethod(&newMethod, methodId);
343
344 const auto signature = pandasm::GetFunctionSignatureFromName(newMethod.name, newMethod.params);
345 if (prog_.functionTable.find(signature) != prog_.functionTable.end()) {
346 return;
347 }
348
349 methodNameToId_.emplace(signature, methodId);
350 prog_.functionSynonyms[newMethod.name].push_back(signature);
351 prog_.functionTable.emplace(signature, std::move(newMethod));
352 }
353
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & methodId)354 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &methodId)
355 {
356 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << methodId << " (0x" << std::hex << methodId << ")";
357
358 if (method == nullptr) {
359 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
360
361 return;
362 }
363
364 panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
365
366 method->name = GetFullMethodName(methodId);
367
368 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
369
370 GetParams(method, methodAccessor.GetProtoId());
371 GetMetaData(method, methodId);
372
373 if (!method->HasImplementation()) {
374 return;
375 }
376
377 if (methodAccessor.GetCodeId().has_value()) {
378 const IdList idList = GetInstructions(method, methodId, methodAccessor.GetCodeId().value());
379
380 for (const auto &id : idList) {
381 AddMethodToTables(id);
382 }
383 } else {
384 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << methodId << " (0x" << std::hex << methodId
385 << "). implementation of method expected, but no \'CODE\' tag was found!";
386
387 return;
388 }
389 }
390
391 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * litArray,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const392 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *litArray, const panda_file::LiteralTag &tag,
393 const panda_file::LiteralDataAccessor::LiteralValue &value) const
394 {
395 panda_file::File::EntityId id(std::get<uint32_t>(value));
396 auto sp = file_->GetSpanFromId(id);
397 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
398 if (tag != panda_file::LiteralTag::ARRAY_STRING) {
399 for (size_t i = 0; i < len; i++) {
400 pandasm::LiteralArray::Literal lit;
401 lit.tag = tag;
402 lit.value = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
403 litArray->literals.push_back(lit);
404 }
405 } else {
406 for (size_t i = 0; i < len; i++) {
407 auto strId = panda_file::helpers::Read<sizeof(T)>(&sp);
408 pandasm::LiteralArray::Literal lit;
409 lit.tag = tag;
410 lit.value = StringDataToString(file_->GetStringData(panda_file::File::EntityId(strId)));
411 litArray->literals.push_back(lit);
412 }
413 }
414 }
415
FillLiteralData(pandasm::LiteralArray * litArray,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const416 void Disassembler::FillLiteralData(pandasm::LiteralArray *litArray,
417 const panda_file::LiteralDataAccessor::LiteralValue &value,
418 const panda_file::LiteralTag &tag) const
419 {
420 pandasm::LiteralArray::Literal lit;
421 if (tag == panda_file::LiteralTag::TAGVALUE) {
422 return;
423 }
424 lit.tag = tag;
425 lit.value = ParseLiteralValue(value, tag);
426 litArray->literals.push_back(lit);
427 }
428
ParseLiteralValue(const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const429 std::variant<bool, uint8_t, uint16_t, uint32_t, uint64_t, float, double, std::string> Disassembler::ParseLiteralValue(
430 const panda_file::LiteralDataAccessor::LiteralValue &value, const panda_file::LiteralTag &tag) const
431 {
432 switch (tag) {
433 case panda_file::LiteralTag::BOOL:
434 return std::get<bool>(value);
435 case panda_file::LiteralTag::ACCESSOR:
436 case panda_file::LiteralTag::NULLVALUE:
437 return std::get<uint8_t>(value);
438 case panda_file::LiteralTag::METHODAFFILIATE:
439 return std::get<uint16_t>(value);
440 case panda_file::LiteralTag::INTEGER:
441 return std::get<uint32_t>(value);
442 case panda_file::LiteralTag::BIGINT:
443 return std::get<uint64_t>(value);
444 case panda_file::LiteralTag::FLOAT:
445 return std::get<float>(value);
446 case panda_file::LiteralTag::DOUBLE:
447 return std::get<double>(value);
448 case panda_file::LiteralTag::STRING:
449 case panda_file::LiteralTag::METHOD:
450 case panda_file::LiteralTag::GENERATORMETHOD:
451 return ParseStringData(value);
452 case panda_file::LiteralTag::LITERALARRAY:
453 return ParseLiteralArrayData(value);
454 default:
455 LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
456 UNREACHABLE();
457 }
458 }
459
ParseStringData(const panda_file::LiteralDataAccessor::LiteralValue & value) const460 std::string Disassembler::ParseStringData(const panda_file::LiteralDataAccessor::LiteralValue &value) const
461 {
462 auto strData = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
463 return StringDataToString(strData);
464 }
465
ParseLiteralArrayData(const panda_file::LiteralDataAccessor::LiteralValue & value) const466 std::string Disassembler::ParseLiteralArrayData(const panda_file::LiteralDataAccessor::LiteralValue &value) const
467 {
468 std::stringstream ss;
469 ss << "0x" << std::hex << std::get<uint32_t>(value);
470 return ss.str();
471 }
472
GetLiteralArrayByOffset(pandasm::LiteralArray * litArray,panda_file::File::EntityId offset) const473 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *litArray, panda_file::File::EntityId offset) const
474 {
475 panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
476 auto processLiteralValue = [this, litArray](const panda_file::LiteralDataAccessor::LiteralValue &value,
477 const panda_file::LiteralTag &tag) {
478 switch (tag) {
479 case panda_file::LiteralTag::ARRAY_U1: {
480 FillLiteralArrayData<bool>(litArray, tag, value);
481 break;
482 }
483 case panda_file::LiteralTag::ARRAY_I8:
484 case panda_file::LiteralTag::ARRAY_U8: {
485 FillLiteralArrayData<uint8_t>(litArray, tag, value);
486 break;
487 }
488 case panda_file::LiteralTag::ARRAY_I16:
489 case panda_file::LiteralTag::ARRAY_U16: {
490 FillLiteralArrayData<uint16_t>(litArray, tag, value);
491 break;
492 }
493 case panda_file::LiteralTag::ARRAY_I32:
494 case panda_file::LiteralTag::ARRAY_U32: {
495 FillLiteralArrayData<uint32_t>(litArray, tag, value);
496 break;
497 }
498 case panda_file::LiteralTag::ARRAY_I64:
499 case panda_file::LiteralTag::ARRAY_U64: {
500 FillLiteralArrayData<uint64_t>(litArray, tag, value);
501 break;
502 }
503 case panda_file::LiteralTag::ARRAY_F32: {
504 FillLiteralArrayData<float>(litArray, tag, value);
505 break;
506 }
507 case panda_file::LiteralTag::ARRAY_F64: {
508 FillLiteralArrayData<double>(litArray, tag, value);
509 break;
510 }
511 case panda_file::LiteralTag::ARRAY_STRING: {
512 FillLiteralArrayData<uint32_t>(litArray, tag, value);
513 break;
514 }
515 default: {
516 FillLiteralData(litArray, value, tag);
517 break;
518 }
519 }
520 };
521
522 litArrayAccessor.EnumerateLiteralVals(offset, processLiteralValue);
523 }
524
GetLiteralArray(pandasm::LiteralArray * litArray,const size_t index)525 void Disassembler::GetLiteralArray(pandasm::LiteralArray *litArray, const size_t index)
526 {
527 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal array]\nindex: " << index;
528
529 panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
530 GetLiteralArrayByOffset(litArray, litArrayAccessor.GetLiteralArrayId(index));
531 }
532
GetLiteralArrays()533 void Disassembler::GetLiteralArrays()
534 {
535 const auto litArraysId = file_->GetLiteralArraysId();
536
537 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << litArraysId << " (0x" << std::hex << litArraysId
538 << ")";
539
540 panda_file::LiteralDataAccessor litArrayAccessor(*file_, litArraysId);
541 size_t numLitarrays = litArrayAccessor.GetLiteralNum();
542 for (size_t index = 0; index < numLitarrays; index++) {
543 ark::pandasm::LiteralArray litAr;
544 GetLiteralArray(&litAr, index);
545 prog_.literalarrayTable.emplace(std::to_string(index), litAr);
546 }
547 }
548
GetRecords()549 void Disassembler::GetRecords()
550 {
551 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
552
553 const auto classIdx = file_->GetClasses();
554
555 for (size_t i = 0; i < classIdx.size(); i++) {
556 uint32_t classId = classIdx[i];
557 auto classOff = file_->GetHeader()->classIdxOff + sizeof(uint32_t) * i;
558
559 if (classId > file_->GetHeader()->fileSize) {
560 LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << classOff << " (0x" << std::hex
561 << classOff << "). binary file corrupted. record offset (0x" << classId
562 << ") out of bounds (0x" << file_->GetHeader()->fileSize << ")!";
563 break;
564 }
565
566 const panda_file::File::EntityId recordId {classId};
567 auto language = GetRecordLanguage(recordId);
568 if (language != fileLanguage_) {
569 if (fileLanguage_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
570 fileLanguage_ = language;
571 } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
572 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << classOff << " (0x"
573 << std::hex << classOff << "). record's language ("
574 << panda_file::LanguageToString(language)
575 << ") differs from file's language ("
576 << panda_file::LanguageToString(fileLanguage_) << ")!";
577 }
578 }
579
580 pandasm::Record record("", fileLanguage_);
581 GetRecord(record, recordId);
582
583 if (prog_.recordTable.find(record.name) == prog_.recordTable.end()) {
584 recordNameToId_.emplace(record.name, recordId);
585 prog_.recordTable.emplace(record.name, std::move(record));
586 }
587 }
588 }
589
GetField(pandasm::Field & field,const panda_file::FieldDataAccessor & fieldAccessor)590 void Disassembler::GetField(pandasm::Field &field, const panda_file::FieldDataAccessor &fieldAccessor)
591 {
592 panda_file::File::EntityId fieldNameId = fieldAccessor.GetNameId();
593 field.name = StringDataToString(file_->GetStringData(fieldNameId));
594
595 uint32_t fieldType = fieldAccessor.GetType();
596 field.type = FieldTypeToPandasmType(fieldType);
597
598 GetMetaData(&field, fieldAccessor.GetFieldId());
599 }
600
GetFields(pandasm::Record & record,const panda_file::File::EntityId & recordId)601 void Disassembler::GetFields(pandasm::Record &record, const panda_file::File::EntityId &recordId)
602 {
603 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
604
605 classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
606 pandasm::Field field(fileLanguage_);
607
608 GetField(field, fieldAccessor);
609
610 record.fieldList.push_back(std::move(field));
611 });
612 }
613
AddExternalFieldsToRecords()614 void Disassembler::AddExternalFieldsToRecords()
615 {
616 for (auto &[recordName, record] : prog_.recordTable) {
617 auto iter = externalFieldTable_.find(recordName);
618 if (iter == externalFieldTable_.end() || iter->second.empty()) {
619 continue;
620 }
621 for (auto &fieldIter : iter->second) {
622 record.fieldList.push_back(std::move(fieldIter));
623 }
624 externalFieldTable_.erase(recordName);
625 }
626 }
627
AddExternalFieldsInfoToRecords()628 void Disassembler::AddExternalFieldsInfoToRecords()
629 {
630 for (auto &[recordName, recordInfo] : progInfo_.recordsInfo) {
631 auto iter = externalFieldsInfoTable_.find(recordName);
632 if (iter == externalFieldsInfoTable_.end() || iter->second.empty()) {
633 continue;
634 }
635 for (auto &info : iter->second) {
636 recordInfo.fieldsInfo.push_back(std::move(info));
637 }
638 externalFieldsInfoTable_.erase(recordName);
639 }
640 }
641
GetMethods(const panda_file::File::EntityId & recordId)642 void Disassembler::GetMethods(const panda_file::File::EntityId &recordId)
643 {
644 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
645
646 classAccessor.EnumerateMethods([&](panda_file::MethodDataAccessor &methodAccessor) -> void {
647 AddMethodToTables(methodAccessor.GetMethodId());
648 });
649 }
650
GetParams(pandasm::Function * method,const panda_file::File::EntityId & protoId) const651 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &protoId) const
652 {
653 /// frame size - 2^16 - 1
654 static const uint32_t MAX_ARG_NUM = 0xFFFF;
655
656 LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << protoId << " (0x" << std::hex << protoId << ")";
657
658 if (method == nullptr) {
659 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
660
661 return;
662 }
663
664 panda_file::ProtoDataAccessor protoAccessor(*file_, protoId);
665
666 if (protoAccessor.GetNumArgs() > MAX_ARG_NUM) {
667 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << protoId << " (0x" << std::hex << protoId
668 << "). number of function's arguments (" << std::dec << protoAccessor.GetNumArgs()
669 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
670
671 return;
672 }
673
674 size_t refIdx = 0;
675 method->returnType = PFTypeToPandasmType(protoAccessor.GetReturnType(), protoAccessor, refIdx);
676
677 for (size_t i = 0; i < protoAccessor.GetNumArgs(); i++) {
678 auto argType = PFTypeToPandasmType(protoAccessor.GetArgType(i), protoAccessor, refIdx);
679 method->params.emplace_back(argType, fileLanguage_);
680 }
681 }
682
GetExceptions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId) const683 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId methodId,
684 panda_file::File::EntityId codeId) const
685 {
686 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << codeId << " (0x" << std::hex << codeId << ")";
687
688 if (method == nullptr) {
689 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
690 return LabelTable {};
691 }
692
693 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
694
695 const auto bcIns = BytecodeInstruction(codeAccessor.GetInstructions());
696 const auto bcInsLast = bcIns.JumpTo(codeAccessor.GetCodeSize());
697
698 size_t tryIdx = 0;
699 LabelTable labelTable {};
700 codeAccessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &tryBlock) {
701 pandasm::Function::CatchBlock catchBlockPa {};
702 if (!LocateTryBlock(bcIns, bcInsLast, tryBlock, &catchBlockPa, &labelTable, tryIdx)) {
703 return false;
704 }
705 size_t catchIdx = 0;
706 tryBlock.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catchBlock) {
707 auto classIdx = catchBlock.GetTypeIdx();
708 if (classIdx == panda_file::INVALID_INDEX) {
709 catchBlockPa.exceptionRecord = "";
710 } else {
711 const auto classId = file_->ResolveClassIndex(methodId, classIdx);
712 catchBlockPa.exceptionRecord = GetFullRecordName(classId);
713 }
714 if (!LocateCatchBlock(bcIns, bcInsLast, catchBlock, &catchBlockPa, &labelTable, tryIdx, catchIdx)) {
715 return false;
716 }
717
718 method->catchBlocks.push_back(catchBlockPa);
719 catchBlockPa.catchBeginLabel = "";
720 catchBlockPa.catchEndLabel = "";
721 catchIdx++;
722
723 return true;
724 });
725 tryIdx++;
726
727 return true;
728 });
729
730 return labelTable;
731 }
732
GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst,BytecodeInstruction bcInsCur)733 static size_t GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst, BytecodeInstruction bcInsCur)
734 {
735 size_t count = 0;
736
737 while (bcInsFirst.GetAddress() != bcInsCur.GetAddress()) {
738 count++;
739 bcInsFirst = bcInsFirst.GetNext();
740 if (bcInsFirst.GetAddress() > bcInsCur.GetAddress()) {
741 return std::numeric_limits<size_t>::max();
742 }
743 }
744
745 return count;
746 }
747
748 // CC-OFFNXT(G.FUN.01) solid logic
LocateTryBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::TryBlock & tryBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx) const749 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
750 const panda_file::CodeDataAccessor::TryBlock &tryBlock,
751 pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable,
752 size_t tryIdx) const
753 {
754 const auto tryBeginBcIns = bcIns.JumpTo(tryBlock.GetStartPc());
755 const auto tryEndBcIns = bcIns.JumpTo(tryBlock.GetStartPc() + tryBlock.GetLength());
756
757 const size_t tryBeginIdx = GetBytecodeInstructionNumber(bcIns, tryBeginBcIns);
758 const size_t tryEndIdx = GetBytecodeInstructionNumber(bcIns, tryEndBcIns);
759
760 const bool tryBeginOffsetInRange = bcInsLast.GetAddress() > tryBeginBcIns.GetAddress();
761 const bool tryEndOffsetInRange = bcInsLast.GetAddress() >= tryEndBcIns.GetAddress();
762 const bool tryBeginOffsetValid = tryBeginIdx != std::numeric_limits<size_t>::max();
763 const bool tryEndOffsetValid = tryEndIdx != std::numeric_limits<size_t>::max();
764
765 if (!tryBeginOffsetInRange || !tryBeginOffsetValid) {
766 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
767 << tryBeginBcIns.GetAddress();
768 return false;
769 }
770
771 auto itBegin = labelTable->find(tryBeginIdx);
772 if (itBegin == labelTable->end()) {
773 std::stringstream ss {};
774 ss << "try_begin_label_" << tryIdx;
775 catchBlockPa->tryBeginLabel = ss.str();
776 labelTable->insert(std::pair<size_t, std::string>(tryBeginIdx, ss.str()));
777 } else {
778 catchBlockPa->tryBeginLabel = itBegin->second;
779 }
780
781 if (!tryEndOffsetInRange || !tryEndOffsetValid) {
782 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
783 << tryEndBcIns.GetAddress();
784 return false;
785 }
786
787 auto itEnd = labelTable->find(tryEndIdx);
788 if (itEnd == labelTable->end()) {
789 std::stringstream ss {};
790 ss << "try_end_label_" << tryIdx;
791 catchBlockPa->tryEndLabel = ss.str();
792 labelTable->insert(std::pair<size_t, std::string>(tryEndIdx, ss.str()));
793 } else {
794 catchBlockPa->tryEndLabel = itEnd->second;
795 }
796
797 return true;
798 }
799
LocateCatchBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::CatchBlock & catchBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx,size_t catchIdx) const800 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
801 const panda_file::CodeDataAccessor::CatchBlock &catchBlock,
802 pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable, size_t tryIdx,
803 size_t catchIdx) const
804 {
805 const auto handlerBeginOffset = catchBlock.GetHandlerPc();
806 const auto handlerEndOffset = handlerBeginOffset + catchBlock.GetCodeSize();
807
808 const auto handlerBeginBcIns = bcIns.JumpTo(handlerBeginOffset);
809 const auto handlerEndBcIns = bcIns.JumpTo(handlerEndOffset);
810
811 const size_t handlerBeginIdx = GetBytecodeInstructionNumber(bcIns, handlerBeginBcIns);
812 const size_t handlerEndIdx = GetBytecodeInstructionNumber(bcIns, handlerEndBcIns);
813
814 const bool handlerBeginOffsetInRange = bcInsLast.GetAddress() > handlerBeginBcIns.GetAddress();
815 const bool handlerEndOffsetInRange = bcInsLast.GetAddress() > handlerEndBcIns.GetAddress();
816 const bool handlerEndPresent = catchBlock.GetCodeSize() != 0;
817 const bool handlerBeginOffsetValid = handlerBeginIdx != std::numeric_limits<size_t>::max();
818 const bool handlerEndOffsetValid = handlerEndIdx != std::numeric_limits<size_t>::max();
819
820 if (!handlerBeginOffsetInRange || !handlerBeginOffsetValid) {
821 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
822 << handlerBeginBcIns.GetAddress();
823 return false;
824 }
825
826 auto itBegin = labelTable->find(handlerBeginIdx);
827 if (itBegin == labelTable->end()) {
828 std::stringstream ss {};
829 ss << "handler_begin_label_" << tryIdx << "_" << catchIdx;
830 catchBlockPa->catchBeginLabel = ss.str();
831 labelTable->insert(std::pair<size_t, std::string>(handlerBeginIdx, ss.str()));
832 } else {
833 catchBlockPa->catchBeginLabel = itBegin->second;
834 }
835
836 if (!handlerEndOffsetInRange || !handlerEndOffsetValid) {
837 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
838 << handlerEndBcIns.GetAddress();
839 return false;
840 }
841
842 if (handlerEndPresent) {
843 auto itEnd = labelTable->find(handlerEndIdx);
844 if (itEnd == labelTable->end()) {
845 std::stringstream ss {};
846 ss << "handler_end_label_" << tryIdx << "_" << catchIdx;
847 catchBlockPa->catchEndLabel = ss.str();
848 labelTable->insert(std::pair<size_t, std::string>(handlerEndIdx, ss.str()));
849 } else {
850 catchBlockPa->catchEndLabel = itEnd->second;
851 }
852 }
853
854 return true;
855 }
856
857 template <typename T>
SetEntityAttribute(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute)858 static void SetEntityAttribute(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute)
859 {
860 if (shouldSet()) {
861 auto err = entity->metadata->SetAttribute(attribute);
862 if (err.has_value()) {
863 LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
864 }
865 }
866 }
867
868 template <typename T>
SetEntityAttributeValue(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute,const char * value)869 static void SetEntityAttributeValue(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute,
870 const char *value)
871 {
872 if (shouldSet()) {
873 auto err = entity->metadata->SetAttributeValue(attribute, value);
874 if (err.has_value()) {
875 LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
876 }
877 }
878 }
879
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & methodId) const880 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &methodId) const
881 {
882 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << methodId << " (0x" << std::hex << methodId << ")";
883
884 if (method == nullptr) {
885 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
886
887 return;
888 }
889
890 panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
891
892 const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
893
894 if (!methodAccessor.IsStatic()) {
895 const auto className = StringDataToString(file_->GetStringData(methodAccessor.GetClassId()));
896 auto thisType = pandasm::Type::FromDescriptor(className);
897
898 LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << methodNameRaw
899 << "\') is not static. emplacing self-argument of type " << thisType.GetName();
900
901 method->params.insert(method->params.begin(), pandasm::Function::Parameter(thisType, fileLanguage_));
902 }
903 SetEntityAttribute(
904 method, [&methodAccessor]() { return methodAccessor.IsStatic(); }, "static");
905
906 SetEntityAttribute(
907 method, [this, &methodAccessor]() { return file_->IsExternal(methodAccessor.GetMethodId()); }, "external");
908
909 SetEntityAttribute(
910 method, [&methodAccessor]() { return methodAccessor.IsNative(); }, "native");
911
912 SetEntityAttribute(
913 method, [&methodAccessor]() { return methodAccessor.IsAbstract(); }, "noimpl");
914
915 SetEntityAttribute(
916 method, [&methodAccessor]() { return methodAccessor.IsVarArgs(); }, "varargs");
917
918 SetEntityAttributeValue(
919 method, [&methodAccessor]() { return methodAccessor.IsPublic(); }, "access.function", "public");
920
921 SetEntityAttributeValue(
922 method, [&methodAccessor]() { return methodAccessor.IsProtected(); }, "access.function", "protected");
923
924 SetEntityAttributeValue(
925 method, [&methodAccessor]() { return methodAccessor.IsPrivate(); }, "access.function", "private");
926
927 SetEntityAttribute(
928 method, [&methodAccessor]() { return methodAccessor.IsFinal(); }, "final");
929
930 std::string ctorName = ark::panda_file::GetCtorName(fileLanguage_);
931 std::string cctorName = ark::panda_file::GetCctorName(fileLanguage_);
932
933 const bool isCtor = (methodNameRaw == ctorName);
934 const bool isCctor = (methodNameRaw == cctorName);
935
936 if (isCtor) {
937 method->metadata->SetAttribute("ctor");
938 method->name.replace(method->name.find(ctorName), ctorName.length(), "_ctor_");
939 } else if (isCctor) {
940 method->metadata->SetAttribute("cctor");
941 method->name.replace(method->name.find(cctorName), cctorName.length(), "_cctor_");
942 }
943 }
944
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & recordId) const945 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &recordId) const
946 {
947 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << recordId << " (0x" << std::hex << recordId << ")";
948
949 if (record == nullptr) {
950 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
951
952 return;
953 }
954
955 SetEntityAttribute(
956 record, [this, recordId]() { return file_->IsExternal(recordId); }, "external");
957
958 auto external = file_->IsExternal(recordId);
959 if (!external) {
960 auto cda = panda_file::ClassDataAccessor {*file_, recordId};
961 SetEntityAttributeValue(
962 record, [&cda]() { return cda.IsPublic(); }, "access.record", "public");
963
964 SetEntityAttributeValue(
965 record, [&cda]() { return cda.IsProtected(); }, "access.record", "protected");
966
967 SetEntityAttributeValue(
968 record, [&cda]() { return cda.IsPrivate(); }, "access.record", "private");
969
970 SetEntityAttribute(
971 record, [&cda]() { return cda.IsFinal(); }, "final");
972 }
973 }
974
975 template <typename T, pandasm::Value::Type VALUE_TYPE>
SetMetadata(panda_file::FieldDataAccessor & accessor,pandasm::Field * field) const976 void Disassembler::SetMetadata(panda_file::FieldDataAccessor &accessor, pandasm::Field *field) const
977 {
978 std::optional<T> val = accessor.GetValue<T>();
979 if (val.has_value()) {
980 field->metadata->SetValue(pandasm::ScalarValue::Create<VALUE_TYPE>(val.value()));
981 }
982 }
983
GetMetadataFieldValue(panda_file::FieldDataAccessor & fieldAccessor,pandasm::Field * field) const984 void Disassembler::GetMetadataFieldValue(panda_file::FieldDataAccessor &fieldAccessor, pandasm::Field *field) const
985 {
986 static const std::unordered_map<panda_file::Type::TypeId,
987 std::function<void(panda_file::FieldDataAccessor &, pandasm::Field *)>>
988 HANDLERS = {
989 {panda_file::Type::TypeId::U1,
990 [this](auto &accessor, auto *f) { SetMetadata<bool, pandasm::Value::Type::U1>(accessor, f); }},
991 {panda_file::Type::TypeId::U8,
992 [this](auto &accessor, auto *f) { SetMetadata<uint8_t, pandasm::Value::Type::U8>(accessor, f); }},
993 {panda_file::Type::TypeId::U16,
994 [this](auto &accessor, auto *f) { SetMetadata<uint16_t, pandasm::Value::Type::U16>(accessor, f); }},
995 {panda_file::Type::TypeId::U32,
996 [this](auto &accessor, auto *f) { SetMetadata<uint32_t, pandasm::Value::Type::U32>(accessor, f); }},
997 {panda_file::Type::TypeId::F64,
998 [this](auto &accessor, auto *f) { SetMetadata<double, pandasm::Value::Type::F64>(accessor, f); }},
999 {panda_file::Type::TypeId::I8,
1000 [this](auto &accessor, auto *f) { SetMetadata<int8_t, pandasm::Value::Type::I8>(accessor, f); }},
1001 {panda_file::Type::TypeId::I16,
1002 [this](auto &accessor, auto *f) { SetMetadata<int16_t, pandasm::Value::Type::I16>(accessor, f); }},
1003 {panda_file::Type::TypeId::I32,
1004 [this](auto &accessor, auto *f) { SetMetadata<int32_t, pandasm::Value::Type::I32>(accessor, f); }},
1005 {panda_file::Type::TypeId::I64,
1006 [this](auto &accessor, auto *f) { SetMetadata<int64_t, pandasm::Value::Type::I64>(accessor, f); }},
1007 };
1008
1009 auto it = HANDLERS.find(field->type.GetId());
1010 if (it != HANDLERS.end()) {
1011 it->second(fieldAccessor, field);
1012 } else if (field->type.GetId() == panda_file::Type::TypeId::REFERENCE &&
1013 field->type.GetName() == "std/core/String") {
1014 std::optional<uint32_t> stringOffsetVal = fieldAccessor.GetValue<uint32_t>();
1015 if (stringOffsetVal.has_value()) {
1016 std::string_view val {reinterpret_cast<const char *>(
1017 file_->GetStringData(panda_file::File::EntityId(stringOffsetVal.value())).data)};
1018 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(val));
1019 }
1020 } else if (field->type.GetRank() > 0) {
1021 std::optional<uint32_t> litarrayOffsetVal = fieldAccessor.GetValue<uint32_t>();
1022 if (litarrayOffsetVal.has_value()) {
1023 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(
1024 std::string_view {std::to_string(litarrayOffsetVal.value())}));
1025 }
1026 }
1027 }
1028
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & fieldId) const1029 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &fieldId) const
1030 {
1031 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << fieldId << " (0x" << std::hex << fieldId << ")";
1032
1033 if (field == nullptr) {
1034 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
1035
1036 return;
1037 }
1038
1039 panda_file::FieldDataAccessor fieldAccessor(*file_, fieldId);
1040
1041 SetEntityAttribute(
1042 field, [&fieldAccessor]() { return fieldAccessor.IsExternal(); }, "external");
1043
1044 SetEntityAttribute(
1045 field, [&fieldAccessor]() { return fieldAccessor.IsStatic(); }, "static");
1046
1047 SetEntityAttributeValue(
1048 field, [&fieldAccessor]() { return fieldAccessor.IsPublic(); }, "access.field", "public");
1049
1050 SetEntityAttributeValue(
1051 field, [&fieldAccessor]() { return fieldAccessor.IsProtected(); }, "access.field", "protected");
1052
1053 SetEntityAttributeValue(
1054 field, [&fieldAccessor]() { return fieldAccessor.IsPrivate(); }, "access.field", "private");
1055
1056 SetEntityAttribute(
1057 field, [&fieldAccessor]() { return fieldAccessor.IsFinal(); }, "final");
1058 GetMetadataFieldValue(fieldAccessor, field);
1059 }
1060
AnnotationTagToString(const char tag) const1061 std::string Disassembler::AnnotationTagToString(const char tag) const
1062 {
1063 static const std::unordered_map<char, std::string> TAG_TO_STRING = {{'1', "u1"},
1064 {'2', "i8"},
1065 {'3', "u8"},
1066 {'4', "i16"},
1067 {'5', "u16"},
1068 {'6', "i32"},
1069 {'7', "u32"},
1070 {'8', "i64"},
1071 {'9', "u64"},
1072 {'A', "f32"},
1073 {'B', "f64"},
1074 {'C', "string"},
1075 {'D', "record"},
1076 {'E', "method"},
1077 {'F', "enum"},
1078 {'G', "annotation"},
1079 {'J', "method_handle"},
1080 {'H', "array"},
1081 {'K', "u1[]"},
1082 {'L', "i8[]"},
1083 {'M', "u8[]"},
1084 {'N', "i16[]"},
1085 {'O', "u16[]"},
1086 {'P', "i32[]"},
1087 {'Q', "u32[]"},
1088 {'R', "i64[]"},
1089 {'S', "u64[]"},
1090 {'T', "f32[]"},
1091 {'U', "f64[]"},
1092 {'V', "string[]"},
1093 {'W', "record[]"},
1094 {'X', "method[]"},
1095 {'Y', "enum[]"},
1096 {'Z', "annotation[]"},
1097 {'@', "method_handle[]"},
1098 {'*', "nullptr_string"}};
1099
1100 return TAG_TO_STRING.at(tag);
1101 }
1102
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1103 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1104 {
1105 std::stringstream ss;
1106
1107 if (type == "i8") {
1108 auto res = value.Get<int8_t>();
1109 ss << static_cast<int>(res);
1110 } else if (type == "u1" || type == "u8") {
1111 auto res = value.Get<uint8_t>();
1112 ss << static_cast<unsigned int>(res);
1113 } else if (type == "i16") {
1114 ss << value.Get<int16_t>();
1115 } else if (type == "u16") {
1116 ss << value.Get<uint16_t>();
1117 } else if (type == "i32") {
1118 ss << value.Get<int32_t>();
1119 } else if (type == "u32") {
1120 ss << value.Get<uint32_t>();
1121 } else if (type == "i64") {
1122 ss << value.Get<int64_t>();
1123 } else if (type == "u64") {
1124 ss << value.Get<uint64_t>();
1125 } else if (type == "f32") {
1126 ss << value.Get<float>();
1127 } else if (type == "f64") {
1128 ss << value.Get<double>();
1129 } else if (type == "string") {
1130 const auto id = value.Get<panda_file::File::EntityId>();
1131 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1132 } else if (type == "record") {
1133 const auto id = value.Get<panda_file::File::EntityId>();
1134 ss << GetFullRecordName(id);
1135 } else if (type == "method") {
1136 const auto id = value.Get<panda_file::File::EntityId>();
1137 AddMethodToTables(id);
1138 ss << GetMethodSignature(id);
1139 } else if (type == "enum") {
1140 const auto id = value.Get<panda_file::File::EntityId>();
1141 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1142 ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1143 << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1144 } else if (type == "annotation") {
1145 const auto id = value.Get<panda_file::File::EntityId>();
1146 ss << "id_" << id;
1147 } else if (type == "void") {
1148 return std::string();
1149 } else if (type == "method_handle") {
1150 } else if (type == "nullptr_string") {
1151 ss << static_cast<uint32_t>(0);
1152 }
1153
1154 return ss.str();
1155 }
1156
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1157 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1158 const size_t idx)
1159 {
1160 std::stringstream ss;
1161
1162 if (type == "i8") {
1163 auto res = value.Get<int8_t>(idx);
1164 ss << static_cast<int>(res);
1165 } else if (type == "u1" || type == "u8") {
1166 auto res = value.Get<uint8_t>(idx);
1167 ss << static_cast<unsigned int>(res);
1168 } else if (type == "i16") {
1169 ss << (value.Get<int16_t>(idx));
1170 } else if (type == "u16") {
1171 ss << (value.Get<uint16_t>(idx));
1172 } else if (type == "i32") {
1173 ss << (value.Get<int32_t>(idx));
1174 } else if (type == "u32") {
1175 ss << (value.Get<uint32_t>(idx));
1176 } else if (type == "i64") {
1177 ss << (value.Get<int64_t>(idx));
1178 } else if (type == "u64") {
1179 ss << (value.Get<uint64_t>(idx));
1180 } else if (type == "f32") {
1181 ss << value.Get<float>(idx);
1182 } else if (type == "f64") {
1183 ss << value.Get<double>(idx);
1184 } else if (type == "string") {
1185 const auto id = value.Get<panda_file::File::EntityId>(idx);
1186 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1187 } else if (type == "record") {
1188 const auto id = value.Get<panda_file::File::EntityId>(idx);
1189 ss << GetFullRecordName(id);
1190 } else if (type == "method") {
1191 const auto id = value.Get<panda_file::File::EntityId>(idx);
1192 AddMethodToTables(id);
1193 ss << GetMethodSignature(id);
1194 } else if (type == "enum") {
1195 const auto id = value.Get<panda_file::File::EntityId>(idx);
1196 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1197 ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1198 << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1199 } else if (type == "annotation") {
1200 const auto id = value.Get<panda_file::File::EntityId>(idx);
1201 ss << "id_" << id;
1202 } else if (type == "method_handle") {
1203 } else if (type == "nullptr_string") {
1204 }
1205
1206 return ss.str();
1207 }
1208
GetFullMethodName(const panda_file::File::EntityId & methodId) const1209 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &methodId) const
1210 {
1211 ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1212
1213 const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
1214
1215 std::string className = GetFullRecordName(methodAccessor.GetClassId());
1216 if (IsSystemType(className)) {
1217 className = "";
1218 } else {
1219 className += ".";
1220 }
1221
1222 return className + methodNameRaw;
1223 }
1224
GetMethodSignature(const panda_file::File::EntityId & methodId) const1225 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &methodId) const
1226 {
1227 ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1228
1229 pandasm::Function method(GetFullMethodName(methodId), fileLanguage_);
1230 GetParams(&method, methodAccessor.GetProtoId());
1231 GetMetaData(&method, methodId);
1232
1233 return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1234 }
1235
GetFullRecordName(const panda_file::File::EntityId & classId) const1236 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &classId) const
1237 {
1238 std::string name = StringDataToString(file_->GetStringData(classId));
1239
1240 auto type = pandasm::Type::FromDescriptor(name);
1241 type = pandasm::Type(type.GetComponentName(), type.GetRank());
1242
1243 return type.GetPandasmName();
1244 }
1245
1246 static constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1247
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor,std::stringstream & ss)1248 static void GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor, std::stringstream &ss)
1249 {
1250 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1251 << ", type: 0x" << fieldAccessor.GetType();
1252 }
1253
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor)1254 static std::string GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor)
1255 {
1256 std::stringstream ss;
1257 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1258 << ", type: 0x" << fieldAccessor.GetType();
1259 return ss.str();
1260 }
1261
GetRecordInfo(const panda_file::File::EntityId & recordId,RecordInfo * recordInfo) const1262 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &recordId, RecordInfo *recordInfo) const
1263 {
1264 if (file_->IsExternal(recordId)) {
1265 return;
1266 }
1267
1268 panda_file::ClassDataAccessor classAccessor {*file_, recordId};
1269 std::stringstream ss;
1270
1271 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << classAccessor.GetClassId()
1272 << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << classAccessor.GetSize() << " ("
1273 << std::dec << classAccessor.GetSize() << ")";
1274
1275 recordInfo->recordInfo = ss.str();
1276 ss.str(std::string());
1277
1278 classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
1279 GetFieldInfo(fieldAccessor, ss);
1280
1281 recordInfo->fieldsInfo.push_back(ss.str());
1282
1283 ss.str(std::string());
1284 });
1285 }
1286
GetMethodInfo(const panda_file::File::EntityId & methodId,MethodInfo * methodInfo) const1287 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &methodId, MethodInfo *methodInfo) const
1288 {
1289 panda_file::MethodDataAccessor methodAccessor {*file_, methodId};
1290 std::stringstream ss;
1291
1292 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1293 << methodAccessor.GetMethodId();
1294
1295 if (methodAccessor.GetCodeId().has_value()) {
1296 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1297 << methodAccessor.GetCodeId().value();
1298
1299 GetInsInfo(methodAccessor, methodAccessor.GetCodeId().value(), methodInfo);
1300 } else {
1301 ss << ", <no code>";
1302 }
1303
1304 auto profileSize = methodAccessor.GetProfileSize();
1305 if (profileSize) {
1306 ss << ", profile size: " << profileSize.value();
1307 }
1308
1309 methodInfo->methodInfo = ss.str();
1310
1311 if (methodAccessor.GetCodeId()) {
1312 ASSERT(debugInfoExtractor_ != nullptr);
1313 methodInfo->lineNumberTable = debugInfoExtractor_->GetLineNumberTable(methodId);
1314 methodInfo->localVariableTable = debugInfoExtractor_->GetLocalVariableTable(methodId);
1315
1316 // Add information about parameters into the table
1317 panda_file::CodeDataAccessor codeda(*file_, methodAccessor.GetCodeId().value());
1318 auto argIdx = static_cast<int32_t>(codeda.GetNumVregs());
1319 uint32_t codeSize = codeda.GetCodeSize();
1320 for (const auto &info : debugInfoExtractor_->GetParameterInfo(methodId)) {
1321 panda_file::LocalVariableInfo argInfo {info.name, info.signature, "", argIdx++, 0, codeSize};
1322 methodInfo->localVariableTable.emplace_back(argInfo);
1323 }
1324 }
1325 }
1326
Serialize(const std::string & name,const pandasm::LiteralArray & litArray,std::ostream & os) const1327 void Disassembler::Serialize(const std::string &name, const pandasm::LiteralArray &litArray, std::ostream &os) const
1328 {
1329 if (litArray.literals.empty()) {
1330 return;
1331 }
1332
1333 bool isConst = litArray.literals[0].IsArray();
1334
1335 std::stringstream specifiers {};
1336
1337 if (isConst) {
1338 specifiers << LiteralTagToString(litArray.literals[0].tag) << " " << litArray.literals.size() << " ";
1339 }
1340
1341 os << ".array array_" << name << " " << specifiers.str() << "{";
1342
1343 SerializeValues(litArray, isConst, os);
1344
1345 os << "}\n";
1346 }
1347
LiteralTagToString(const panda_file::LiteralTag & tag) const1348 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1349 {
1350 switch (tag) {
1351 case panda_file::LiteralTag::BOOL:
1352 case panda_file::LiteralTag::ARRAY_U1:
1353 return "u1";
1354 case panda_file::LiteralTag::ARRAY_U8:
1355 return "u8";
1356 case panda_file::LiteralTag::ARRAY_I8:
1357 return "i8";
1358 case panda_file::LiteralTag::ARRAY_U16:
1359 return "u16";
1360 case panda_file::LiteralTag::ARRAY_I16:
1361 return "i16";
1362 case panda_file::LiteralTag::ARRAY_U32:
1363 return "u32";
1364 case panda_file::LiteralTag::INTEGER:
1365 case panda_file::LiteralTag::ARRAY_I32:
1366 return "i32";
1367 case panda_file::LiteralTag::ARRAY_U64:
1368 return "u64";
1369 case panda_file::LiteralTag::BIGINT:
1370 case panda_file::LiteralTag::ARRAY_I64:
1371 return "i64";
1372 case panda_file::LiteralTag::FLOAT:
1373 case panda_file::LiteralTag::ARRAY_F32:
1374 return "f32";
1375 case panda_file::LiteralTag::DOUBLE:
1376 case panda_file::LiteralTag::ARRAY_F64:
1377 return "f64";
1378 case panda_file::LiteralTag::STRING:
1379 case panda_file::LiteralTag::ARRAY_STRING:
1380 return pandasm::Type::FromDescriptor(panda_file::GetStringClassDescriptor(fileLanguage_)).GetPandasmName();
1381 case panda_file::LiteralTag::ACCESSOR:
1382 return "accessor";
1383 case panda_file::LiteralTag::NULLVALUE:
1384 return "nullvalue";
1385 case panda_file::LiteralTag::METHODAFFILIATE:
1386 return "method_affiliate";
1387 case panda_file::LiteralTag::METHOD:
1388 return "method";
1389 case panda_file::LiteralTag::GENERATORMETHOD:
1390 return "generator_method";
1391 case panda_file::LiteralTag::LITERALARRAY:
1392 return "lit_offset";
1393 default:
1394 LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
1395 UNREACHABLE();
1396 }
1397 }
1398
SerializeLiterals(const pandasm::LiteralArray::Literal & lit) const1399 std::string Disassembler::SerializeLiterals(const pandasm::LiteralArray::Literal &lit) const
1400 {
1401 std::stringstream res {};
1402 const auto &val = lit.value;
1403 switch (lit.tag) {
1404 case panda_file::LiteralTag::BOOL: {
1405 res << (std::get<bool>(val));
1406 break;
1407 }
1408 case panda_file::LiteralTag::INTEGER: {
1409 res << (bit_cast<int32_t>(std::get<uint32_t>(val)));
1410 break;
1411 }
1412 case panda_file::LiteralTag::DOUBLE: {
1413 res << (std::get<double>(val));
1414 break;
1415 }
1416 case panda_file::LiteralTag::STRING: {
1417 res << "\"" << (std::get<std::string>(val)) << "\"";
1418 break;
1419 }
1420 case panda_file::LiteralTag::METHOD:
1421 case panda_file::LiteralTag::GENERATORMETHOD: {
1422 res << (std::get<std::string>(val));
1423 break;
1424 }
1425 case panda_file::LiteralTag::NULLVALUE:
1426 case panda_file::LiteralTag::ACCESSOR: {
1427 res << (static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val))));
1428 break;
1429 }
1430 case panda_file::LiteralTag::METHODAFFILIATE: {
1431 res << (std::get<uint16_t>(val));
1432 break;
1433 }
1434 case panda_file::LiteralTag::LITERALARRAY: {
1435 res << (std::get<std::string>(val));
1436 break;
1437 }
1438 default:
1439 UNREACHABLE();
1440 }
1441 res << ", ";
1442 return res.str();
1443 }
1444
LiteralValueToString(const pandasm::LiteralArray::Literal & lit) const1445 std::string Disassembler::LiteralValueToString(const pandasm::LiteralArray::Literal &lit) const
1446 {
1447 if (lit.IsBoolValue()) {
1448 std::stringstream res {};
1449 res << (std::get<bool>(lit.value));
1450 return res.str();
1451 }
1452
1453 if (lit.IsByteValue()) {
1454 return LiteralIntegralValueToString<uint8_t>(lit);
1455 }
1456
1457 if (lit.IsShortValue()) {
1458 return LiteralIntegralValueToString<uint16_t>(lit);
1459 }
1460
1461 if (lit.IsIntegerValue()) {
1462 return LiteralIntegralValueToString<uint32_t>(lit);
1463 }
1464
1465 if (lit.IsLongValue()) {
1466 return LiteralIntegralValueToString<uint64_t>(lit);
1467 }
1468
1469 if (lit.IsDoubleValue()) {
1470 std::stringstream res {};
1471 res << std::get<double>(lit.value);
1472 return res.str();
1473 }
1474
1475 if (lit.IsFloatValue()) {
1476 std::stringstream res {};
1477 res << std::get<float>(lit.value);
1478 return res.str();
1479 }
1480
1481 if (lit.IsStringValue()) {
1482 std::stringstream res {};
1483 res << "\"" << std::get<std::string>(lit.value) << "\"";
1484 return res.str();
1485 }
1486
1487 if (lit.IsLiteralArrayValue()) {
1488 return SerializeLiterals(lit);
1489 }
1490
1491 UNREACHABLE();
1492 }
1493
SerializeValues(const pandasm::LiteralArray & litArray,const bool isConst,std::ostream & os) const1494 void Disassembler::SerializeValues(const pandasm::LiteralArray &litArray, const bool isConst, std::ostream &os) const
1495 {
1496 std::string separator = (isConst) ? (" ") : ("\n");
1497
1498 os << separator;
1499
1500 if (isConst) {
1501 for (const auto &l : litArray.literals) {
1502 os << LiteralValueToString(l) << separator;
1503 }
1504 } else {
1505 for (const auto &l : litArray.literals) {
1506 os << "\t" << LiteralTagToString(l.tag) << " " << LiteralValueToString(l) << separator;
1507 }
1508 }
1509 }
1510
Serialize(const pandasm::Record & record,std::ostream & os,bool printInformation) const1511 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1512 {
1513 if (IsSystemType(record.name)) {
1514 return;
1515 }
1516
1517 os << ".record " << record.name;
1518
1519 const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1520 const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1521 if (recordInTable) {
1522 Serialize(*record.metadata, recordIter->second.annList, os);
1523 } else {
1524 Serialize(*record.metadata, {}, os);
1525 }
1526
1527 if (record.metadata->IsForeign() && record.fieldList.empty()) {
1528 os << "\n\n";
1529 return;
1530 }
1531
1532 os << " {";
1533
1534 if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1535 os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1536 SerializeFields(record, os, true);
1537 } else {
1538 os << "\n";
1539 SerializeFields(record, os, false);
1540 }
1541
1542 os << "}\n\n";
1543 }
1544
SerializeUnionFields(const pandasm::Record & record,std::ostream & os,bool printInformation) const1545 void Disassembler::SerializeUnionFields(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1546 {
1547 if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1548 os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1549 SerializeFields(record, os, true, true);
1550 } else {
1551 SerializeFields(record, os, false, true);
1552 }
1553 os << "\n";
1554 }
1555
DumpLiteralArray(const pandasm::LiteralArray & literalArray,std::stringstream & ss) const1556 void Disassembler::DumpLiteralArray(const pandasm::LiteralArray &literalArray, std::stringstream &ss) const
1557 {
1558 ss << "[";
1559 bool firstItem = true;
1560 for (const auto &item : literalArray.literals) {
1561 if (!firstItem) {
1562 ss << ", ";
1563 } else {
1564 firstItem = false;
1565 }
1566
1567 switch (item.tag) {
1568 case panda_file::LiteralTag::INTEGER: {
1569 ss << std::get<uint32_t>(item.value); // CC-OFF(G.EXP.30-CPP) false positive
1570 break;
1571 }
1572 case panda_file::LiteralTag::DOUBLE: {
1573 ss << std::get<double>(item.value);
1574 break;
1575 }
1576 case panda_file::LiteralTag::BOOL: {
1577 ss << std::get<bool>(item.value);
1578 break;
1579 }
1580 case panda_file::LiteralTag::STRING: {
1581 ss << "\"" << std::get<std::string>(item.value) << "\"";
1582 break;
1583 }
1584 case panda_file::LiteralTag::LITERALARRAY: {
1585 std::string offsetStr = std::get<std::string>(item.value);
1586 const int hexBase = 16;
1587 uint32_t litArrayOffset = std::stoi(offsetStr, nullptr, hexBase);
1588 pandasm::LiteralArray litArray;
1589 GetLiteralArrayByOffset(&litArray, panda_file::File::EntityId(litArrayOffset));
1590 DumpLiteralArray(litArray, ss);
1591 break;
1592 }
1593 default: {
1594 UNREACHABLE();
1595 break;
1596 }
1597 }
1598 }
1599 ss << "]";
1600 }
1601
SerializeFieldValue(const pandasm::Field & f,std::stringstream & ss) const1602 void Disassembler::SerializeFieldValue(const pandasm::Field &f, std::stringstream &ss) const
1603 {
1604 if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1605 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1606 } else if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1607 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1608 } else if (f.type.GetId() == panda_file::Type::TypeId::F64) {
1609 ss << " = " << static_cast<double>(f.metadata->GetValue().value().GetValue<double>());
1610 } else if (f.type.GetId() == panda_file::Type::TypeId::U1) {
1611 ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<bool>());
1612 } else if (f.type.GetId() == panda_file::Type::TypeId::I32) {
1613 ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<int>());
1614 } else if (f.type.GetId() == panda_file::Type::TypeId::REFERENCE && f.type.GetName() == "std/core/String") {
1615 ss << " = \"" << static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()) << "\"";
1616 } else if (f.type.GetRank() > 0) {
1617 uint32_t litArrayOffset =
1618 std::stoi(static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()));
1619 pandasm::LiteralArray litArray;
1620 GetLiteralArrayByOffset(&litArray, panda_file::File::EntityId(litArrayOffset));
1621 ss << " = ";
1622 DumpLiteralArray(litArray, ss);
1623 }
1624 }
1625
SerializeFields(const pandasm::Record & record,std::ostream & os,bool printInformation,bool isUnion) const1626 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool printInformation,
1627 bool isUnion) const
1628 {
1629 constexpr size_t INFO_OFFSET = 80;
1630
1631 const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1632 const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1633
1634 const auto recInf = (printInformation) ? (progInfo_.recordsInfo.at(record.name)) : (RecordInfo {});
1635
1636 size_t fieldIdx = 0;
1637
1638 std::stringstream ss;
1639 for (const auto &f : record.fieldList) {
1640 if (isUnion) {
1641 ss << ".union_field ";
1642 } else {
1643 ss << "\t";
1644 }
1645 ss << f.type.GetPandasmName() << " " << f.name;
1646 if (f.metadata->GetValue().has_value()) {
1647 SerializeFieldValue(f, ss);
1648 }
1649 if (!isUnion && recordInTable) {
1650 const auto fieldIter = recordIter->second.fieldAnnotations.find(f.name);
1651 if (fieldIter != recordIter->second.fieldAnnotations.end()) {
1652 Serialize(*f.metadata, fieldIter->second, ss);
1653 } else {
1654 Serialize(*f.metadata, {}, ss);
1655 }
1656 } else if (!isUnion && !recordInTable) {
1657 Serialize(*f.metadata, {}, ss);
1658 }
1659
1660 if (printInformation) {
1661 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << recInf.fieldsInfo.at(fieldIdx) << "\n";
1662 } else {
1663 os << ss.str() << "\n";
1664 }
1665
1666 ss.str(std::string());
1667 ss.clear();
1668
1669 fieldIdx++;
1670 }
1671 }
1672
Serialize(const pandasm::Function::CatchBlock & catchBlock,std::ostream & os) const1673 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catchBlock, std::ostream &os) const
1674 {
1675 if (catchBlock.exceptionRecord.empty()) {
1676 os << ".catchall ";
1677 } else {
1678 os << ".catch " << catchBlock.exceptionRecord << ", ";
1679 }
1680
1681 os << catchBlock.tryBeginLabel << ", " << catchBlock.tryEndLabel << ", " << catchBlock.catchBeginLabel;
1682
1683 if (!catchBlock.catchEndLabel.empty()) {
1684 os << ", " << catchBlock.catchEndLabel;
1685 }
1686 }
1687
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & annList,std::ostream & os) const1688 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &annList, std::ostream &os) const
1689 {
1690 auto boolAttributes = meta.GetBoolAttributes();
1691 auto attributes = meta.GetAttributes();
1692 if (boolAttributes.empty() && attributes.empty() && annList.empty()) {
1693 return;
1694 }
1695
1696 os << " <";
1697
1698 size_t size = boolAttributes.size();
1699 size_t idx = 0;
1700 for (const auto &attr : boolAttributes) {
1701 os << attr;
1702 ++idx;
1703
1704 if (!attributes.empty() || !annList.empty() || idx < size) {
1705 os << ", ";
1706 }
1707 }
1708
1709 size = attributes.size();
1710 idx = 0;
1711 for (const auto &[key, values] : attributes) {
1712 for (size_t i = 0; i < values.size(); i++) {
1713 os << key << "=" << values[i];
1714
1715 if (i < values.size() - 1) {
1716 os << ", ";
1717 }
1718 }
1719
1720 ++idx;
1721
1722 if (!annList.empty() || idx < size) {
1723 os << ", ";
1724 }
1725 }
1726
1727 size = annList.size();
1728 idx = 0;
1729 for (const auto &[key, value] : annList) {
1730 os << key << "=" << value;
1731
1732 ++idx;
1733
1734 if (idx < size) {
1735 os << ", ";
1736 }
1737 }
1738
1739 os << ">";
1740 }
1741
SerializeLineNumberTable(const panda_file::LineNumberTable & lineNumberTable,std::ostream & os) const1742 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &lineNumberTable, std::ostream &os) const
1743 {
1744 if (lineNumberTable.empty()) {
1745 return;
1746 }
1747
1748 os << "\n# LINE_NUMBER_TABLE:\n";
1749 for (const auto &lineInfo : lineNumberTable) {
1750 os << "#\tline " << lineInfo.line << ": " << lineInfo.offset << "\n";
1751 }
1752 }
1753
SerializeLocalVariableTable(const panda_file::LocalVariableTable & localVariableTable,const pandasm::Function & method,std::ostream & os) const1754 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &localVariableTable,
1755 const pandasm::Function &method, std::ostream &os) const
1756 {
1757 if (localVariableTable.empty()) {
1758 return;
1759 }
1760
1761 os << "\n# LOCAL_VARIABLE_TABLE:\n";
1762 os << "#\t Start End Register Name Signature\n";
1763 const int startWidth = 5;
1764 const int endWidth = 4;
1765 const int regWidth = 8;
1766 const int nameWidth = 14;
1767 for (const auto &variableInfo : localVariableTable) {
1768 std::ostringstream regStream;
1769 regStream << variableInfo.regNumber << '(';
1770 if (variableInfo.regNumber < 0) {
1771 regStream << "acc";
1772 } else {
1773 uint32_t vreg = variableInfo.regNumber;
1774 uint32_t firstArgReg = method.GetTotalRegs();
1775 if (vreg < firstArgReg) {
1776 regStream << 'v' << vreg;
1777 } else {
1778 regStream << 'a' << vreg - firstArgReg;
1779 }
1780 }
1781 regStream << ')';
1782
1783 os << "#\t " << std::setw(startWidth) << std::right << variableInfo.startOffset << " ";
1784 os << std::setw(endWidth) << std::right << variableInfo.endOffset << " ";
1785 os << std::setw(regWidth) << std::right << regStream.str() << " ";
1786 os << std::setw(nameWidth) << std::right << variableInfo.name << " " << variableInfo.type;
1787 if (!variableInfo.typeSignature.empty() && variableInfo.typeSignature != variableInfo.type) {
1788 os << " (" << variableInfo.typeSignature << ")";
1789 }
1790 os << "\n";
1791 }
1792 }
1793
SerializeLanguage(std::ostream & os) const1794 void Disassembler::SerializeLanguage(std::ostream &os) const
1795 {
1796 os << ".language " << ark::panda_file::LanguageToString(fileLanguage_) << "\n\n";
1797 }
1798
SerializeFilename(std::ostream & os) const1799 void Disassembler::SerializeFilename(std::ostream &os) const
1800 {
1801 if (file_ == nullptr || file_->GetFilename().empty()) {
1802 return;
1803 }
1804
1805 os << "# source binary: " << file_->GetFilename() << "\n\n";
1806 }
1807
SerializeLitArrays(std::ostream & os,bool addSeparators) const1808 void Disassembler::SerializeLitArrays(std::ostream &os, bool addSeparators) const
1809 {
1810 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
1811
1812 if (prog_.literalarrayTable.empty()) {
1813 return;
1814 }
1815
1816 if (addSeparators) {
1817 os << "# ====================\n"
1818 "# LITERALS\n\n";
1819 }
1820
1821 for (const auto &pair : prog_.literalarrayTable) {
1822 Serialize(pair.first, pair.second, os);
1823 }
1824
1825 os << "\n";
1826 }
1827
SerializeRecords(std::ostream & os,bool addSeparators,bool printInformation) const1828 void Disassembler::SerializeRecords(std::ostream &os, bool addSeparators, bool printInformation) const
1829 {
1830 LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
1831
1832 if (prog_.recordTable.empty()) {
1833 return;
1834 }
1835
1836 if (addSeparators) {
1837 os << "# ====================\n"
1838 "# RECORDS\n\n";
1839 }
1840
1841 for (const auto &r : prog_.recordTable) {
1842 if (!panda_file::IsDummyClassName(r.first)) {
1843 Serialize(r.second, os, printInformation);
1844 } else {
1845 SerializeUnionFields(r.second, os, printInformation);
1846 }
1847 }
1848 }
1849
SerializeMethods(std::ostream & os,bool addSeparators,bool printInformation) const1850 void Disassembler::SerializeMethods(std::ostream &os, bool addSeparators, bool printInformation) const
1851 {
1852 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
1853
1854 if (prog_.functionTable.empty()) {
1855 return;
1856 }
1857
1858 if (addSeparators) {
1859 os << "# ====================\n"
1860 "# METHODS\n\n";
1861 }
1862
1863 for (const auto &m : prog_.functionTable) {
1864 Serialize(m.second, os, printInformation);
1865 }
1866 }
1867
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1868 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1869 {
1870 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1871 }
1872
IDToString(BytecodeInstruction bcIns,panda_file::File::EntityId methodId) const1873 std::string Disassembler::IDToString(BytecodeInstruction bcIns, panda_file::File::EntityId methodId) const
1874 {
1875 std::stringstream name;
1876
1877 if (bcIns.HasFlag(BytecodeInstruction::Flags::TYPE_ID)) {
1878 auto idx = bcIns.GetId().AsIndex();
1879 auto id = file_->ResolveClassIndex(methodId, idx);
1880 auto type = pandasm::Type::FromDescriptor(StringDataToString(file_->GetStringData(id)));
1881
1882 name.str("");
1883 name << type.GetPandasmName();
1884 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1885 auto idx = bcIns.GetId().AsIndex();
1886 auto id = file_->ResolveMethodIndex(methodId, idx);
1887
1888 name << GetMethodSignature(id);
1889 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
1890 name << '\"';
1891
1892 if (skipStrings_ || quiet_) {
1893 name << std::hex << "0x" << bcIns.GetId().AsFileId();
1894 } else {
1895 name << StringDataToString(file_->GetStringData(bcIns.GetId().AsFileId()));
1896 }
1897
1898 name << '\"';
1899 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1900 auto idx = bcIns.GetId().AsIndex();
1901 auto id = file_->ResolveFieldIndex(methodId, idx);
1902 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1903
1904 auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1905 if (!panda_file::IsDummyClassName(recordName)) {
1906 name << recordName;
1907 name << '.';
1908 }
1909 name << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1910 } else if (bcIns.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
1911 auto index = bcIns.GetId().AsIndex();
1912 name << "array_" << index;
1913 }
1914
1915 return name.str();
1916 }
1917
GetRecordLanguage(panda_file::File::EntityId classId) const1918 ark::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId classId) const
1919 {
1920 if (file_->IsExternal(classId)) {
1921 return ark::panda_file::SourceLang::PANDA_ASSEMBLY;
1922 }
1923
1924 panda_file::ClassDataAccessor cda(*file_, classId);
1925 return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1926 }
1927
1928 // CC-OFFNXT(G.FUN.01) solid logic
TranslateImmToLabel(pandasm::Ins * paIns,LabelTable * labelTable,const uint8_t * insArr,BytecodeInstruction bcIns,BytecodeInstruction bcInsLast,panda_file::File::EntityId codeId)1929 static void TranslateImmToLabel(pandasm::Ins *paIns, LabelTable *labelTable, const uint8_t *insArr,
1930 BytecodeInstruction bcIns, BytecodeInstruction bcInsLast,
1931 panda_file::File::EntityId codeId)
1932 {
1933 const int32_t jmpOffset = std::get<int64_t>(paIns->imms.at(0));
1934 const auto bcInsDest = bcIns.JumpTo(jmpOffset);
1935 if (bcInsLast.GetAddress() > bcInsDest.GetAddress()) {
1936 size_t idx = GetBytecodeInstructionNumber(BytecodeInstruction(insArr), bcInsDest);
1937 if (idx != std::numeric_limits<size_t>::max()) {
1938 if (labelTable->find(idx) == labelTable->end()) {
1939 std::stringstream ss;
1940 ss << "jump_label_" << labelTable->size();
1941 (*labelTable)[idx] = ss.str();
1942 }
1943
1944 paIns->imms.clear();
1945 paIns->ids.push_back(labelTable->at(idx));
1946 } else {
1947 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1948 << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1949 << ": invalid jump offset 0x" << jmpOffset
1950 << " - jumping in the middle of another instruction!";
1951 }
1952 } else {
1953 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1954 << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1955 << ": invalid jump offset 0x" << jmpOffset << " - jumping out of bounds!";
1956 }
1957 }
1958
CollectExternalFields(const panda_file::FieldDataAccessor & fieldAccessor)1959 void Disassembler::CollectExternalFields(const panda_file::FieldDataAccessor &fieldAccessor)
1960 {
1961 auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1962
1963 pandasm::Field field(fileLanguage_);
1964 GetField(field, fieldAccessor);
1965 if (field.name.empty()) {
1966 return;
1967 }
1968
1969 auto &fieldList = externalFieldTable_[recordName];
1970 auto retField = std::find_if(fieldList.begin(), fieldList.end(),
1971 [&field](pandasm::Field &fieldFromList) { return field.name == fieldFromList.name; });
1972 if (retField == fieldList.end()) {
1973 fieldList.emplace_back(std::move(field));
1974
1975 externalFieldsInfoTable_[recordName].emplace_back(GetFieldInfo(fieldAccessor));
1976 }
1977 }
1978
GetInstructions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId)1979 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId methodId,
1980 panda_file::File::EntityId codeId)
1981 {
1982 panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
1983
1984 const auto insSz = codeAccessor.GetCodeSize();
1985 const auto insArr = codeAccessor.GetInstructions();
1986
1987 method->regsNum = codeAccessor.GetNumVregs();
1988
1989 auto bcIns = BytecodeInstruction(insArr);
1990 auto from = bcIns.GetAddress();
1991 const auto bcInsLast = bcIns.JumpTo(insSz);
1992
1993 LabelTable labelTable = GetExceptions(method, methodId, codeId);
1994
1995 IdList unknownExternalMethods {};
1996
1997 while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
1998 if (bcIns.GetAddress() > bcInsLast.GetAddress()) {
1999 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
2000 << "). bytecode instructions sequence corrupted for method " << method->name
2001 << "! went out of bounds";
2002
2003 break;
2004 }
2005
2006 if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
2007 auto idx = bcIns.GetId().AsIndex();
2008 auto id = file_->ResolveFieldIndex(methodId, idx);
2009 panda_file::FieldDataAccessor fieldAccessor(*file_, id);
2010
2011 if (fieldAccessor.IsExternal()) {
2012 CollectExternalFields(fieldAccessor);
2013 }
2014 }
2015
2016 auto paIns = BytecodeInstructionToPandasmInstruction(bcIns, methodId);
2017 paIns.insDebug.boundLeft =
2018 bcIns.GetAddress() - from; // It is used to produce a line table during method serialization
2019 if (paIns.IsJump()) {
2020 TranslateImmToLabel(&paIns, &labelTable, insArr, bcIns, bcInsLast, codeId);
2021 }
2022
2023 // check if method id is unknown external method. if so, emplace it in table
2024 if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
2025 const auto argMethodIdx = bcIns.GetId().AsIndex();
2026 const auto argMethodId = file_->ResolveMethodIndex(methodId, argMethodIdx);
2027
2028 const auto argMethodSignature = GetMethodSignature(argMethodId);
2029
2030 const bool isPresent = prog_.functionTable.find(argMethodSignature) != prog_.functionTable.cend();
2031 const bool isExternal = file_->IsExternal(argMethodId);
2032 if (isExternal && !isPresent) {
2033 unknownExternalMethods.push_back(argMethodId);
2034 }
2035 }
2036
2037 method->ins.push_back(paIns);
2038 bcIns = bcIns.GetNext();
2039 }
2040
2041 for (const auto &pair : labelTable) {
2042 method->ins[pair.first].label = pair.second;
2043 method->ins[pair.first].setLabel = true;
2044 }
2045
2046 return unknownExternalMethods;
2047 }
2048
2049 } // namespace ark::disasm
2050