• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <codecvt>
16 #include <locale>
17 
18 #include "verifier.h"
19 #include "class_data_accessor-inl.h"
20 #include "libpandafile/util/collect_util.h"
21 #include "zlib.h"
22 
23 namespace panda::verifier {
24 
Verifier(const std::string & filename)25 Verifier::Verifier(const std::string &filename)
26 {
27     auto file_to_verify = panda_file::File::Open(filename);
28     file_.swap(file_to_verify);
29 }
30 
Verify()31 bool Verifier::Verify()
32 {
33     if (!VerifyChecksum()) {
34         return false;
35     }
36 
37     if (!CollectIdInfos()) {
38         return false;
39     }
40 
41     if (!VerifyConstantPool()) {
42         return false;
43     }
44 
45     return true;
46 }
47 
CollectIdInfos()48 bool Verifier::CollectIdInfos()
49 {
50     if (file_ == nullptr) {
51         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
52         return false;
53     }
54     GetConstantPoolIds();
55     if (include_literal_array_ids) {
56         GetLiteralIds();
57     }
58     return CheckConstantPool(verifier::ActionType::COLLECTINFOS);
59 }
60 
VerifyChecksum()61 bool Verifier::VerifyChecksum()
62 {
63     if (file_ == nullptr) {
64         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
65         return false;
66     }
67     uint32_t file_size = file_->GetHeader()->file_size;
68     ASSERT(file_size > FILE_CONTENT_OFFSET);
69     uint32_t cal_checksum = adler32(1, file_->GetBase() + FILE_CONTENT_OFFSET, file_size - FILE_CONTENT_OFFSET);
70     return file_->GetHeader()->checksum == cal_checksum;
71 }
72 
VerifyConstantPool()73 bool Verifier::VerifyConstantPool()
74 {
75     if (file_ == nullptr) {
76         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
77         return false;
78     }
79 
80     if (!CheckConstantPoolIndex()) {
81         return false;
82     }
83 
84     if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOLCONTENT)) {
85         return false;
86     }
87 
88     if (!VerifyLiteralArrays()) {
89         return false;
90     }
91 
92     return true;
93 }
94 
VerifyRegisterIndex()95 bool Verifier::VerifyRegisterIndex()
96 {
97     if (file_ == nullptr) {
98         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
99         return false;
100     }
101 
102     for (const auto id : all_method_ids_) {
103         const panda_file::File::EntityId method_id = panda_file::File::EntityId(id);
104         panda_file::MethodDataAccessor method_accessor {*file_, method_id};
105         if (!method_accessor.GetCodeId().has_value()) {
106             continue;
107         }
108         panda_file::CodeDataAccessor code_data(*file_, method_accessor.GetCodeId().value());
109         const uint64_t reg_nums = code_data.GetNumVregs();
110         const uint64_t arg_nums = code_data.GetNumArgs();
111         const std::optional<uint64_t> valid_regs_num = SafeAdd(reg_nums, arg_nums);
112         if (!valid_regs_num.has_value()) {
113             LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
114             return false;
115         }
116         if (valid_regs_num.value() > MAX_REGISTER_INDEX + 1) {
117             LOG(ERROR, VERIFIER) << "Register index exceeds the maximum allowable value (0xffff)!";
118             return false;
119         }
120         auto bc_ins = BytecodeInstruction(code_data.GetInstructions());
121         const auto bc_ins_last = bc_ins.JumpTo(code_data.GetCodeSize());
122         ASSERT(arg_nums >= DEFAULT_ARGUMENT_NUMBER);
123         while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) {
124             const size_t count = GetVRegCount(bc_ins);
125             if (count == 0) { // Skip instructions that do not use registers
126                 bc_ins = bc_ins.GetNext();
127                 continue;
128             }
129             if (!CheckVRegIdx(bc_ins, count, valid_regs_num.value())) {
130                 return false;
131             }
132             bc_ins = bc_ins.GetNext();
133         }
134     }
135     return true;
136 }
137 
VerifyConstantPoolIndex()138 bool Verifier::VerifyConstantPoolIndex()
139 {
140     if (file_ == nullptr) {
141         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
142         return false;
143     }
144 
145     if (!CheckConstantPoolIndex()) {
146         return false;
147     }
148 
149     return true;
150 }
151 
VerifyConstantPoolContent()152 bool Verifier::VerifyConstantPoolContent()
153 {
154     if (file_ == nullptr) {
155         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
156         return false;
157     }
158 
159     if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOLCONTENT)) {
160         return false;
161     }
162 
163     if (!VerifyLiteralArrays()) {
164         return false;
165     }
166 
167     return true;
168 }
169 
GetConstantPoolIds()170 void Verifier::GetConstantPoolIds()
171 {
172     if (constant_pool_ids_.size() != 0) {
173         return;
174     }
175     auto index_headers = file_->GetIndexHeaders();
176     for (const auto &index_header : index_headers) {
177         auto region_indexs = file_->GetMethodIndex(&index_header);
178         for (auto &index : region_indexs) {
179             constant_pool_ids_.push_back(index.GetOffset());
180         }
181     }
182 }
183 
GetLiteralIds()184 void Verifier::GetLiteralIds()
185 {
186     if (literal_ids_.size() != 0) {
187         return;
188     }
189 
190     if (panda_file::ContainsLiteralArrayInHeader(file_->GetHeader()->version)) {
191         const auto literal_arrays = file_->GetLiteralArrays();
192         PushToLiteralIds(literal_arrays);
193     } else {
194         panda::libpandafile::CollectUtil collect_util;
195         std::unordered_set<uint32_t> literal_array_ids;
196         collect_util.CollectLiteralArray(*file_, literal_array_ids);
197         PushToLiteralIds(literal_array_ids);
198     }
199 }
200 
201 template <typename T>
PushToLiteralIds(T & ids)202 void Verifier::PushToLiteralIds(T &ids)
203 {
204     for (const auto id : ids) {
205         literal_ids_.push_back(id);
206     }
207 }
208 
CheckConstantPoolActions(const verifier::ActionType type,panda_file::File::EntityId method_id)209 bool Verifier::CheckConstantPoolActions(const verifier::ActionType type, panda_file::File::EntityId method_id)
210 {
211     switch (type) {
212         case verifier::ActionType::CHECKCONSTPOOLCONTENT: {
213             return CheckConstantPoolMethodContent(method_id);
214         }
215         case verifier::ActionType::COLLECTINFOS: {
216             all_method_ids_.push_back(method_id.GetOffset());
217             return CollectIdInInstructions(method_id);
218         }
219         default: {
220             return true;
221         }
222     }
223 }
224 
CollectIdInInstructions(const panda_file::File::EntityId & method_id)225 bool Verifier::CollectIdInInstructions(const panda_file::File::EntityId &method_id)
226 {
227     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
228     if (!method_accessor.GetCodeId().has_value()) {
229         return false;
230     }
231     panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value());
232     const auto ins_size = code_accessor.GetCodeSize();
233     const auto ins_arr = code_accessor.GetInstructions();
234 
235     auto bc_ins = BytecodeInstruction(ins_arr);
236     const auto bc_ins_last = bc_ins.JumpTo(ins_size);
237 
238     while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) {
239         if (!bc_ins.IsPrimaryOpcodeValid()) {
240             LOG(ERROR, VERIFIER) << "Fail to verify primary opcode!";
241             return false;
242         }
243         if (bc_ins.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
244             // the idx of any instruction with a literal id is 0
245             // except defineclasswithbuffer/callruntime.definesendableclass
246             size_t idx = bc_ins.GetLiteralIndex();
247             const auto arg_literal_idx = bc_ins.GetId(idx).AsIndex();
248             const auto literal_id = file_->ResolveMethodIndex(method_id, arg_literal_idx);
249             ins_literal_ids_.insert(literal_id.GetOffset());
250         }
251         if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
252             const auto arg_method_idx = bc_ins.GetId().AsIndex();
253             const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
254             ins_method_ids_.insert(arg_method_id.GetOffset());
255         }
256         if (bc_ins.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
257             const auto arg_string_idx = bc_ins.GetId().AsIndex();
258             const auto string_id = file_->ResolveOffsetByIndex(method_id, arg_string_idx);
259             ins_string_ids_.insert(string_id.GetOffset());
260         }
261         bc_ins = bc_ins.GetNext();
262     }
263     return true;
264 }
265 
CollectModuleLiteralId(const panda_file::File::EntityId & field_id)266 void Verifier::CollectModuleLiteralId(const panda_file::File::EntityId &field_id)
267 {
268     panda_file::FieldDataAccessor field_accessor(*file_, field_id);
269     const auto literal_id = field_accessor.GetValue<uint32_t>().value();
270     if (std::find(literal_ids_.begin(), literal_ids_.end(), literal_id) != literal_ids_.end()) {
271         module_literals_.insert(literal_id);
272     }
273 }
274 
CheckConstantPool(const verifier::ActionType type)275 bool Verifier::CheckConstantPool(const verifier::ActionType type)
276 {
277     const auto class_idx = file_->GetClasses();
278     for (size_t i = 0; i < class_idx.size(); i++) {
279         uint32_t class_id = class_idx[i];
280         if (class_id > file_->GetHeader()->file_size) {
281             LOG(ERROR, VERIFIER) << "Binary file corrupted. out of bounds (0x" << std::hex
282                                  << file_->GetHeader()->file_size;
283             return false;
284         }
285         const panda_file::File::EntityId record_id {class_id};
286         if (!file_->IsExternal(record_id)) {
287             panda_file::ClassDataAccessor class_accessor {*file_, record_id};
288             bool check_res = true;
289             class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
290                 check_res = check_res && CheckConstantPoolActions(type, method_accessor.GetMethodId());
291             });
292             if (!check_res) {
293                 return false;
294             }
295             if (type == verifier::ActionType::COLLECTINFOS) {
296                 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
297                     CollectModuleLiteralId(field_accessor.GetFieldId());
298                 });
299             }
300         }
301     }
302 
303     return true;
304 }
305 
GetVRegCount(const BytecodeInstruction & bc_ins)306 size_t Verifier::GetVRegCount(const BytecodeInstruction &bc_ins)
307 {
308     size_t idx = 0; // Represents the idxTH register index in an instruction
309     BytecodeInstruction::Format format = bc_ins.GetFormat();
310     while (bc_ins.HasVReg(format, idx)) {
311         idx++;
312     }
313     return idx;
314 }
315 
IsRangeInstAndHasInvalidRegIdx(const BytecodeInstruction & bc_ins,const size_t count,uint64_t valid_regs_num)316 bool Verifier::IsRangeInstAndHasInvalidRegIdx(const BytecodeInstruction &bc_ins,
317                                               const size_t count, uint64_t valid_regs_num)
318 {
319     ASSERT(bc_ins.IsRangeInstruction());
320 
321     uint64_t reg_idx = bc_ins.GetVReg(FIRST_INDEX);
322     if (IsRegIdxOutOfBounds(reg_idx, valid_regs_num)) { // for [format: +AA/+AAAA vBB vCC], vBB can be verified here
323         return true;
324     }
325 
326     std::optional<uint64_t> max_ins_reg_idx_opt = bc_ins.GetRangeInsLastRegIdx();
327     if (!max_ins_reg_idx_opt.has_value()) {
328         LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
329         return true;
330     }
331 
332     reg_idx = max_ins_reg_idx_opt.value();
333     if (IsRegIdxOutOfBounds(reg_idx, valid_regs_num)) {
334         return true;
335     }
336 
337     return false;
338 }
339 
IsRegIdxOutOfBounds(uint64_t reg_idx,uint64_t valid_regs_num)340 bool Verifier::IsRegIdxOutOfBounds(uint64_t reg_idx, uint64_t valid_regs_num)
341 {
342     if (reg_idx >= valid_regs_num) {
343         LOG(ERROR, VERIFIER) << "Register index out of bounds: 0x" << std::hex
344                              << reg_idx << ", Max allowed: 0x" << std::hex << valid_regs_num;
345         return true;
346     }
347     return false;
348 }
349 
CheckVRegIdx(const BytecodeInstruction & bc_ins,const size_t count,uint64_t valid_regs_num)350 bool Verifier::CheckVRegIdx(const BytecodeInstruction &bc_ins, const size_t count, uint64_t valid_regs_num)
351 {
352     if (bc_ins.IsRangeInstruction() &&
353         IsRangeInstAndHasInvalidRegIdx(bc_ins, count, valid_regs_num)) {
354         return false;
355     }
356     for (size_t idx = 0; idx < count; idx++) { // Represents the idxTH register index in an instruction
357         uint16_t reg_idx = bc_ins.GetVReg(idx);
358         if (reg_idx >= valid_regs_num) {
359             LOG(ERROR, VERIFIER) << "Register index out of bounds: 0x" << std::hex
360                                  << reg_idx << ", Max allowed: 0x" << std::hex << valid_regs_num;
361             return false;
362         }
363     }
364     return true;
365 }
366 
VerifyMethodId(const uint32_t & method_id) const367 bool Verifier::VerifyMethodId(const uint32_t &method_id) const
368 {
369     auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), method_id);
370     if (iter == constant_pool_ids_.end() ||
371         (std::find(literal_ids_.begin(), literal_ids_.end(), method_id) != literal_ids_.end()) ||
372         ins_string_ids_.count(method_id)) {
373         LOG(ERROR, VERIFIER) << "Fail to verify method id. method_id(0x" << std::hex << method_id << ")!";
374         return false;
375     }
376     return true;
377 }
378 
VerifyLiteralId(const uint32_t & literal_id) const379 bool Verifier::VerifyLiteralId(const uint32_t &literal_id) const
380 {
381     auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), literal_id);
382     if (iter == constant_pool_ids_.end() ||
383         (std::find(all_method_ids_.begin(), all_method_ids_.end(), literal_id) != all_method_ids_.end()) ||
384         ins_string_ids_.count(literal_id)) {
385         LOG(ERROR, VERIFIER) << "Fail to verify literal id. literal_id(0x" << std::hex << literal_id << ")!";
386         return false;
387     }
388     return true;
389 }
390 
VerifyStringId(const uint32_t & string_id) const391 bool Verifier::VerifyStringId(const uint32_t &string_id) const
392 {
393     auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), string_id);
394     if (iter == constant_pool_ids_.end() ||
395         ins_method_ids_.count(string_id) ||
396         (std::find(literal_ids_.begin(), literal_ids_.end(), string_id) != literal_ids_.end())) {
397         LOG(ERROR, VERIFIER) << "Fail to verify string id. string_id(0x" << std::hex << string_id << ")!";
398         return false;
399     }
400     return true;
401 }
402 
GetFirstImmFromInstruction(const BytecodeInstruction & bc_ins)403 std::optional<int64_t> Verifier::GetFirstImmFromInstruction(const BytecodeInstruction &bc_ins)
404 {
405     std::optional<int64_t> first_imm = std::optional<int64_t> {};
406     size_t index = 0;
407     const auto format = bc_ins.GetFormat();
408     if (bc_ins.HasImm(format, index)) {
409         first_imm = bc_ins.GetImm64(index);
410     }
411 
412     return first_imm;
413 }
414 
GetSlotNumberFromAnnotation(panda_file::MethodDataAccessor & method_accessor)415 std::optional<uint64_t> Verifier::GetSlotNumberFromAnnotation(panda_file::MethodDataAccessor &method_accessor)
416 {
417     std::optional<uint64_t> slot_number {};
418     method_accessor.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
419         panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
420         auto *annotation_name = reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data);
421         if (::strcmp("L_ESSlotNumberAnnotation;", annotation_name) == 0) {
422             uint32_t elem_count = ada.GetCount();
423             for (uint32_t i = 0; i < elem_count; i++) {
424                 panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
425                 auto *elem_name = reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data);
426                 if (::strcmp("SlotNumber", elem_name) == 0) {
427                     slot_number = adae.GetScalarValue().GetValue();
428                 }
429             }
430         }
431     });
432     return slot_number;
433 }
434 
VerifyMethodIdInLiteralArray(const uint32_t & id)435 bool Verifier::VerifyMethodIdInLiteralArray(const uint32_t &id)
436 {
437     const auto method_id = panda_file::File::EntityId(id).GetOffset();
438     auto iter = std::find(all_method_ids_.begin(), all_method_ids_.end(), method_id);
439     if (iter == all_method_ids_.end()) {
440         LOG(ERROR, VERIFIER) << "Invalid method id(0x" << id << ") in literal array";
441         return false;
442     }
443     return true;
444 }
445 
VerifyStringIdInLiteralArray(const uint32_t & id)446 bool Verifier::VerifyStringIdInLiteralArray(const uint32_t &id)
447 {
448     auto string_data = file_->GetStringData(panda_file::File::EntityId(id));
449     if (string_data.data == nullptr) {
450         LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << id << ")!";
451         return false;
452     }
453     auto desc = std::string(utf::Mutf8AsCString(string_data.data));
454     std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
455     std::wstring utf16_desc = converter.from_bytes(desc);
456     if (string_data.utf16_length != utf16_desc.length()) {
457         LOG(ERROR, VERIFIER) << "Invalid string value(0x" << id << ") in literal array";
458         return false;
459     }
460     return true;
461 }
462 
VerifyLiteralIdInLiteralArray(const uint32_t & id)463 bool Verifier::VerifyLiteralIdInLiteralArray(const uint32_t &id)
464 {
465     auto iter = std::find(literal_ids_.begin(), literal_ids_.end(), id);
466     if (iter == literal_ids_.end()) {
467         LOG(ERROR, VERIFIER) << "Invalid literal id(0x" << id << ") in literal array";
468         return false;
469     }
470     return true;
471 }
472 
VerifySingleLiteralArray(const panda_file::File::EntityId & literal_id)473 bool Verifier::VerifySingleLiteralArray(const panda_file::File::EntityId &literal_id)
474 {
475     auto sp = file_->GetSpanFromId(literal_id);
476     const auto literal_vals_num = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
477     for (size_t i = 0; i < literal_vals_num; i += 2U) { // 2u skip literal item
478         const auto tag = static_cast<panda_file::LiteralTag>(panda_file::helpers::Read<panda_file::TAG_SIZE>(&sp));
479         switch (tag) {
480             case panda_file::LiteralTag::TAGVALUE:
481             case panda_file::LiteralTag::BOOL:
482             case panda_file::LiteralTag::ACCESSOR:
483             case panda_file::LiteralTag::NULLVALUE:
484             case panda_file::LiteralTag::BUILTINTYPEINDEX: {
485                 sp = sp.SubSpan(sizeof(uint8_t)); // run next sp
486                 break;
487             }
488             case panda_file::LiteralTag::METHODAFFILIATE: {
489                 sp = sp.SubSpan(sizeof(uint16_t));
490                 break;
491             }
492             case panda_file::LiteralTag::INTEGER:
493             case panda_file::LiteralTag::FLOAT:
494             case panda_file::LiteralTag::GETTER:
495             case panda_file::LiteralTag::SETTER:
496             case panda_file::LiteralTag::GENERATORMETHOD:
497             case panda_file::LiteralTag::LITERALBUFFERINDEX:
498             case panda_file::LiteralTag::ASYNCGENERATORMETHOD: {
499                 sp = sp.SubSpan(sizeof(uint32_t));
500                 break;
501             }
502             case panda_file::LiteralTag::DOUBLE: {
503                 const auto value = bit_cast<double>(panda_file::helpers::Read<sizeof(uint64_t)>(&sp));
504                 // true: High 16-bit of double value >= 0xffff
505                 if (IsImpureNaN(value)) {
506                     LOG(ERROR, VERIFIER) << "Fail to verify double value " << value << " in literal array";
507                     return false;
508                 }
509                 break;
510             }
511             case panda_file::LiteralTag::ARRAY_U1:
512             case panda_file::LiteralTag::ARRAY_U8:
513             case panda_file::LiteralTag::ARRAY_I8:
514             case panda_file::LiteralTag::ARRAY_U16:
515             case panda_file::LiteralTag::ARRAY_I16:
516             case panda_file::LiteralTag::ARRAY_U32:
517             case panda_file::LiteralTag::ARRAY_I32:
518             case panda_file::LiteralTag::ARRAY_U64:
519             case panda_file::LiteralTag::ARRAY_I64:
520             case panda_file::LiteralTag::ARRAY_F32:
521             case panda_file::LiteralTag::ARRAY_F64:
522             case panda_file::LiteralTag::ARRAY_STRING: {
523                 i = literal_vals_num;
524                 break;
525             }
526             case panda_file::LiteralTag::STRING: {
527                 panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
528                 break;
529             }
530             case panda_file::LiteralTag::METHOD: {
531                 const auto value = static_cast<uint32_t>(panda_file::helpers::Read<sizeof(uint32_t)>(&sp));
532                 inner_method_map_.emplace(literal_id.GetOffset(), value);
533                 if (!VerifyMethodIdInLiteralArray(value)) {
534                     return false;
535                 }
536                 break;
537             }
538             case panda_file::LiteralTag::LITERALARRAY: {
539                 const auto value = static_cast<uint32_t>(panda_file::helpers::Read<sizeof(uint32_t)>(&sp));
540                 inner_literal_map_.emplace(literal_id.GetOffset(), value);
541                 if (!VerifyLiteralIdInLiteralArray(value)) {
542                     return false;
543                 }
544                 break;
545             }
546             default: {
547                 LOG(ERROR, VERIFIER) << "Invalid literal tag";
548                 return false;
549             }
550         }
551     }
552     return true;
553 }
554 
IsModuleLiteralId(const panda_file::File::EntityId & id) const555 bool Verifier::IsModuleLiteralId(const panda_file::File::EntityId &id) const
556 {
557     return module_literals_.find(id.GetOffset()) != module_literals_.end();
558 }
559 
VerifyLiteralArrays()560 bool Verifier::VerifyLiteralArrays()
561 {
562     for (const auto &arg_literal_id : literal_ids_) {
563         const auto literal_id = panda_file::File::EntityId(arg_literal_id);
564         if (!IsModuleLiteralId(literal_id) && !VerifySingleLiteralArray(literal_id)) {
565             return false;
566         }
567     }
568     return true;
569 }
570 
PrecomputeInstructionIndices(const BytecodeInstruction & bc_ins_start,const BytecodeInstruction & bc_ins_last)571 bool Verifier::PrecomputeInstructionIndices(const BytecodeInstruction &bc_ins_start,
572                                             const BytecodeInstruction &bc_ins_last)
573 {
574     instruction_index_map_.clear();
575     size_t index = 0;
576     auto current_ins = bc_ins_start;
577     instruction_index_map_[current_ins.GetAddress()] = index;
578 
579     while (current_ins.GetAddress() < bc_ins_last.GetAddress()) {
580         //Must keep IsPrimaryOpcodeValid is the first check item
581         if (!current_ins.IsPrimaryOpcodeValid()) {
582             LOG(ERROR, VERIFIER) << "Fail to verify primary opcode!";
583             return false;
584         }
585         current_ins = current_ins.GetNext();
586         index++;
587         instruction_index_map_[current_ins.GetAddress()] = index;
588     }
589     return true;
590 }
591 
IsMethodBytecodeInstruction(const BytecodeInstruction & bc_ins_cur)592 bool Verifier::IsMethodBytecodeInstruction(const BytecodeInstruction &bc_ins_cur)
593 {
594     if (instruction_index_map_.find(bc_ins_cur.GetAddress()) != instruction_index_map_.end()) {
595         return true;
596     }
597     return false;
598 }
599 
VerifyJumpInstruction(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const BytecodeInstruction & bc_ins_first,const uint8_t * ins_arr,panda_file::File::EntityId code_id)600 bool Verifier::VerifyJumpInstruction(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
601                                      const BytecodeInstruction &bc_ins_first, const uint8_t *ins_arr,
602                                      panda_file::File::EntityId code_id)
603 {
604     // update maximum forward offset
605     const auto bc_ins_forward_size = bc_ins_last.GetAddress() - bc_ins.GetAddress();
606     // update maximum backward offset
607     const auto bc_ins_backward_size = bc_ins.GetAddress() - bc_ins_first.GetAddress();
608 
609     if (bc_ins.IsJumpInstruction()) {
610         std::optional<int64_t> immdata = GetFirstImmFromInstruction(bc_ins);
611         if (!immdata.has_value()) {
612             LOG(ERROR, VERIFIER) << "Fail to get immediate data!";
613             return false;
614         }
615         if ((immdata.value() > 0) && (immdata.value() >= bc_ins_forward_size)) {
616             LOG(ERROR, VERIFIER) << "Jump forward out of boundary";
617             return false;
618         }
619         if ((immdata.value() < 0) && (bc_ins_backward_size + immdata.value() < 0)) {
620             LOG(ERROR, VERIFIER) << "Jump backward out of boundary";
621             return false;
622         }
623 
624         const auto bc_ins_dest = bc_ins.JumpTo(immdata.value());
625         if (!bc_ins_dest.IsPrimaryOpcodeValid()) {
626             LOG(ERROR, VERIFIER) << "Fail to verify target jump primary opcode!";
627             return false;
628         }
629         if (!IsMethodBytecodeInstruction(bc_ins_dest)) {
630             LOG(ERROR, VERIFIER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
631                                  << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
632                                  << ": invalid jump offset 0x" << immdata.value()
633                                  << " - jumping in the middle of another instruction!";
634             return false;
635         }
636     }
637 
638     return true;
639 }
640 
GetIcSlotFromInstruction(const BytecodeInstruction & bc_ins,uint32_t & first_slot_index,bool & has_slot,bool & is_two_slot)641 bool Verifier::GetIcSlotFromInstruction(const BytecodeInstruction &bc_ins, uint32_t &first_slot_index,
642                                         bool &has_slot, bool &is_two_slot)
643 {
644     std::optional<uint64_t> first_imm = {};
645     if (bc_ins.HasFlag(BytecodeInstruction::Flags::ONE_SLOT)) {
646         first_imm = GetFirstImmFromInstruction(bc_ins);
647         if (!first_imm.has_value()) {
648             LOG(ERROR, VERIFIER) << "Fail to get first immediate data!";
649             return false;
650         }
651         first_slot_index = first_imm.value();
652         is_two_slot = false;
653         has_slot = true;
654     } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::TWO_SLOT)) {
655         first_imm = GetFirstImmFromInstruction(bc_ins);
656         if (!first_imm.has_value()) {
657             LOG(ERROR, VERIFIER) << "Fail to get first immediate data!";
658             return false;
659         }
660         first_slot_index = first_imm.value();
661         has_slot = true;
662         is_two_slot = true;
663     }
664 
665     return true;
666 }
667 
VerifyCatchBlocks(panda_file::CodeDataAccessor::TryBlock & try_block,const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last)668 bool Verifier::VerifyCatchBlocks(panda_file::CodeDataAccessor::TryBlock &try_block, const BytecodeInstruction &bc_ins,
669                                  const BytecodeInstruction &bc_ins_last)
670 {
671     bool result = true;
672 
673     try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
674         const auto handler_begin_offset = catch_block.GetHandlerPc();
675         // GetCodeSize() returns a unsigned long value, which is always >= 0,
676         // so handler_end_offset is guaranteed to be >= handler_begin_offset
677         const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
678 
679         const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
680         const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
681 
682         const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
683         const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
684 
685         if (!handler_begin_offset_in_range) {
686             LOG(ERROR, VERIFIER) << "> Invalid catch block begin offset range! address is: 0x" << std::hex
687                                  << handler_begin_bc_ins.GetAddress();
688             result = false;
689             return false;
690         }
691         if (!IsMethodBytecodeInstruction(handler_begin_bc_ins)) {
692             LOG(ERROR, VERIFIER) << "> Invalid catch block begin offset validity! address is: 0x" << std::hex
693                                  << handler_begin_bc_ins.GetAddress();
694             result = false;
695             return false;
696         }
697         if (!handler_end_offset_in_range) {
698             LOG(ERROR, VERIFIER) << "> Invalid catch block end offset range! address is: 0x" << std::hex
699                                  << handler_end_bc_ins.GetAddress();
700             result = false;
701             return false;
702         }
703         if (!IsMethodBytecodeInstruction(handler_end_bc_ins)) {
704             LOG(ERROR, VERIFIER) << "> Invalid catch block end offset validity! address is: 0x" << std::hex
705                                  << handler_end_bc_ins.GetAddress();
706             result = false;
707             return false;
708         }
709 
710         return true;
711     });
712 
713     return result;
714 }
715 
VerifyTryBlocks(panda_file::CodeDataAccessor & code_accessor,const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last)716 bool Verifier::VerifyTryBlocks(panda_file::CodeDataAccessor &code_accessor, const BytecodeInstruction &bc_ins,
717                                const BytecodeInstruction &bc_ins_last)
718 {
719     bool result = true;
720 
721     code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
722         const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
723         // GetLength() returns a uint32 value, which is always >= 0,
724         // so try_end_bc_ins is guaranteed to be >= try_begin_bc_ins
725         const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
726 
727         const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
728         const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
729 
730         if (!try_begin_offset_in_range) {
731             LOG(ERROR, VERIFIER) << "> Invalid try block begin offset range! address is: 0x" << std::hex
732                                  << try_begin_bc_ins.GetAddress();
733             result = false;
734             return false;
735         }
736         if (!IsMethodBytecodeInstruction(try_begin_bc_ins)) {
737             LOG(ERROR, VERIFIER) << "> Invalid try block begin offset validity! address is: 0x" << std::hex
738                                  << try_begin_bc_ins.GetAddress();
739             result = false;
740             return false;
741         }
742         if (!try_end_offset_in_range) {
743             LOG(ERROR, VERIFIER) << "> Invalid try block end offset range! address is: 0x" << std::hex
744                                  << try_end_bc_ins.GetAddress();
745             result = false;
746             return false;
747         }
748         if (!IsMethodBytecodeInstruction(try_end_bc_ins)) {
749             LOG(ERROR, VERIFIER) << "> Invalid try block end offset validity! address is: 0x" << std::hex
750                                  << try_end_bc_ins.GetAddress();
751             result = false;
752             return false;
753         }
754         if (!VerifyCatchBlocks(try_block, bc_ins, bc_ins_last)) {
755             LOG(ERROR, VERIFIER) << "Catch block validation failed!";
756             result = false;
757             return false;
758         }
759 
760         return true;
761     });
762 
763     return result;
764 }
765 
766 
VerifySlotNumber(panda_file::MethodDataAccessor & method_accessor,const uint32_t & slot_number,const panda_file::File::EntityId & method_id)767 bool Verifier::VerifySlotNumber(panda_file::MethodDataAccessor &method_accessor, const uint32_t &slot_number,
768                                 const panda_file::File::EntityId &method_id)
769 {
770     const auto ann_slot_number = GetSlotNumberFromAnnotation(method_accessor);
771     if (!ann_slot_number.has_value()) {
772         LOG(INFO, VERIFIER) << "There is no slot number information in annotaion.";
773         // To be compatible with old abc, slot number verification is not continued
774         return true;
775     }
776     if (slot_number == ann_slot_number.value()) {
777         return true;
778     }
779 
780     LOG(ERROR, VERIFIER) << "Slot number has been falsified in method 0x" << method_id;
781     return false;
782 }
783 
VerifyMethodRegisterIndex(panda_file::CodeDataAccessor & code_accessor,std::optional<uint64_t> & valid_regs_num)784 bool Verifier::VerifyMethodRegisterIndex(panda_file::CodeDataAccessor &code_accessor,
785                                          std::optional<uint64_t> &valid_regs_num)
786 {
787     const uint64_t reg_nums = code_accessor.GetNumVregs();
788     const uint64_t arg_nums = code_accessor.GetNumArgs();
789     valid_regs_num = SafeAdd(reg_nums, arg_nums);
790     if (!valid_regs_num.has_value()) {
791         LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
792         return false;
793     }
794     if (valid_regs_num.value() > MAX_REGISTER_INDEX + 1) {
795         LOG(ERROR, VERIFIER) << "Register index exceeds the maximum allowable value (0xffff)!";
796         return false;
797     }
798     return true;
799 }
800 
VerifyMethodInstructions(const MethodInfos & infos)801 bool Verifier::VerifyMethodInstructions(const MethodInfos &infos)
802 {
803     auto current_ins = infos.bc_ins;
804     auto last_ins = infos.bc_ins_last;
805     auto code_id = infos.method_accessor.GetCodeId().value();
806     auto method_id = infos.method_id;
807     auto valid_regs_num = infos.valid_regs_num.value();
808     auto ins_slot_num = infos.ins_slot_num;
809     auto has_slot = infos.has_slot;
810     auto is_two_slot = infos.is_two_slot;
811 
812     while (current_ins.GetAddress() != last_ins.GetAddress()) {
813         if (current_ins.GetAddress() > last_ins.GetAddress()) {
814             LOG(ERROR, VERIFIER) << "> error encountered at " << code_id
815                                  << " (0x" << std::hex << code_id
816                                  << "). bytecode instructions sequence corrupted for method "
817                                  << method_id
818                                  << "! went out of bounds";
819             return false;
820         }
821         if (!current_ins.IsJumpInstruction() && !current_ins.IsReturnOrThrowInstruction()
822             && current_ins.GetNext().GetAddress() == last_ins.GetAddress()) {
823             LOG(ERROR, VERIFIER) << "> error encountered at " << code_id
824                                  << " (0x" << std::hex << code_id
825                                  << "). bytecode instructions sequence corrupted for method "
826                                  << method_id
827                                  << "! went out of bounds";
828             return false;
829         }
830         const size_t count = GetVRegCount(current_ins);
831         if (count != 0 && !CheckVRegIdx(current_ins, count, valid_regs_num)) {
832             return false;
833         }
834         if (!VerifyJumpInstruction(current_ins, last_ins,
835                                    infos.bc_ins_init, infos.ins_arr,
836                                    code_id)) {
837             LOG(ERROR, VERIFIER) << "Invalid target position of jump instruction";
838             return false;
839         }
840         if (!GetIcSlotFromInstruction(current_ins, ins_slot_num,
841                                       has_slot, is_two_slot)) {
842             LOG(ERROR, VERIFIER) << "Fail to get first slot index!";
843             return false;
844         }
845         current_ins = current_ins.GetNext();
846     }
847     return true;
848 }
849 
CheckConstantPoolMethodContent(const panda_file::File::EntityId & method_id)850 bool Verifier::CheckConstantPoolMethodContent(const panda_file::File::EntityId &method_id)
851 {
852     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
853     if (!method_accessor.GetCodeId().has_value()) {
854         LOG(ERROR, VERIFIER) << "Fail to get code id!";
855         return false;
856     }
857     panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value());
858     const auto ins_size = code_accessor.GetCodeSize();
859     const auto ins_arr = code_accessor.GetInstructions();
860     auto bc_ins = BytecodeInstruction(ins_arr);
861     const auto bc_ins_last = bc_ins.JumpTo(ins_size);
862     const auto bc_ins_init = bc_ins; // initial PC value
863     uint32_t ins_slot_num = 0; // For ic slot index verification
864     bool has_slot = false;
865     bool is_two_slot = false;
866     std::optional<uint64_t> valid_regs_num = 0;
867     MethodInfos infos = {bc_ins_init, bc_ins, bc_ins_last, method_accessor, method_id,
868                          valid_regs_num, ins_arr, ins_slot_num, has_slot, is_two_slot};
869     if (ins_size <= 0) {
870         LOG(ERROR, VERIFIER) << "Fail to verify code size!";
871         return false;
872     }
873     if (!VerifyMethodRegisterIndex(code_accessor, valid_regs_num)) {
874         LOG(ERROR, VERIFIER) << "Fail to verify method register index!";
875         return false;
876     }
877     if (!PrecomputeInstructionIndices(bc_ins, bc_ins_last)) {
878         LOG(ERROR, VERIFIER) << "Fail to precompute instruction indices!";
879         return false;
880     }
881     if (!IsMethodBytecodeInstruction(bc_ins)) {
882         LOG(ERROR, VERIFIER) << "Fail to verify method first bytecode instruction!";
883     }
884     if (!VerifyTryBlocks(code_accessor, bc_ins, bc_ins_last)) {
885         LOG(ERROR, VERIFIER) << "Fail to verify try blocks or catch blocks!";
886         return false;
887     }
888     if (!VerifyMethodInstructions(infos)) {
889         LOG(ERROR, VERIFIER) << "Fail to verify method instructions!";
890         return false;
891     }
892     if (has_slot) {
893         if (is_two_slot) {
894             ins_slot_num += 1; // when there are two slots for the last instruction, the slot index increases
895         }
896         ins_slot_num += 1; // slot index starts with zero
897     }
898     return true;
899 }
900 
CheckConstantPoolIndex() const901 bool Verifier::CheckConstantPoolIndex() const
902 {
903     for (auto &id : ins_method_ids_) {
904         if (!VerifyMethodId(id)) {
905             return false;
906         }
907     }
908 
909     for (auto &id : ins_literal_ids_) {
910         if (!VerifyLiteralId(id)) {
911             return false;
912         }
913     }
914 
915     for (auto &id : ins_string_ids_) {
916         if (!VerifyStringId(id)) {
917             return false;
918         }
919     }
920 
921     return true;
922 }
923 
SafeAdd(uint64_t a,uint64_t b) const924 std::optional<uint64_t> Verifier::SafeAdd(uint64_t a, uint64_t b) const
925 {
926     if (a > std::numeric_limits<uint64_t>::max() - b) {
927         return std::nullopt;
928     }
929     return a + b;
930 }
931 } // namespace panda::verifier
932