1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include <codecvt>
16 #include <locale>
17
18 #include "verifier.h"
19 #include "class_data_accessor-inl.h"
20 #include "libpandafile/util/collect_util.h"
21 #include "zlib.h"
22
23 namespace panda::verifier {
24
Verifier(const std::string & filename)25 Verifier::Verifier(const std::string &filename)
26 {
27 auto file_to_verify = panda_file::File::Open(filename);
28 file_.swap(file_to_verify);
29 }
30
Verify()31 bool Verifier::Verify()
32 {
33 if (!VerifyChecksum()) {
34 return false;
35 }
36
37 if (!CollectIdInfos()) {
38 return false;
39 }
40
41 if (!VerifyConstantPool()) {
42 return false;
43 }
44
45 return true;
46 }
47
CollectIdInfos()48 bool Verifier::CollectIdInfos()
49 {
50 if (file_ == nullptr) {
51 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
52 return false;
53 }
54 GetConstantPoolIds();
55 if (include_literal_array_ids) {
56 GetLiteralIds();
57 }
58 return CheckConstantPool(verifier::ActionType::COLLECTINFOS);
59 }
60
VerifyChecksum()61 bool Verifier::VerifyChecksum()
62 {
63 if (file_ == nullptr) {
64 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
65 return false;
66 }
67 uint32_t file_size = file_->GetHeader()->file_size;
68 ASSERT(file_size > FILE_CONTENT_OFFSET);
69 uint32_t cal_checksum = adler32(1, file_->GetBase() + FILE_CONTENT_OFFSET, file_size - FILE_CONTENT_OFFSET);
70 return file_->GetHeader()->checksum == cal_checksum;
71 }
72
VerifyConstantPool()73 bool Verifier::VerifyConstantPool()
74 {
75 if (file_ == nullptr) {
76 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
77 return false;
78 }
79
80 if (!CheckConstantPoolIndex()) {
81 return false;
82 }
83
84 if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOLCONTENT)) {
85 return false;
86 }
87
88 if (!VerifyLiteralArrays()) {
89 return false;
90 }
91
92 return true;
93 }
94
VerifyRegisterIndex()95 bool Verifier::VerifyRegisterIndex()
96 {
97 if (file_ == nullptr) {
98 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
99 return false;
100 }
101
102 for (const auto id : all_method_ids_) {
103 const panda_file::File::EntityId method_id = panda_file::File::EntityId(id);
104 panda_file::MethodDataAccessor method_accessor {*file_, method_id};
105 if (!method_accessor.GetCodeId().has_value()) {
106 continue;
107 }
108 panda_file::CodeDataAccessor code_data(*file_, method_accessor.GetCodeId().value());
109 const uint64_t reg_nums = code_data.GetNumVregs();
110 const uint64_t arg_nums = code_data.GetNumArgs();
111 const std::optional<uint64_t> valid_regs_num = SafeAdd(reg_nums, arg_nums);
112 if (!valid_regs_num.has_value()) {
113 LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
114 return false;
115 }
116 if (valid_regs_num.value() > MAX_REGISTER_INDEX + 1) {
117 LOG(ERROR, VERIFIER) << "Register index exceeds the maximum allowable value (0xffff)!";
118 return false;
119 }
120 auto bc_ins = BytecodeInstruction(code_data.GetInstructions());
121 const auto bc_ins_last = bc_ins.JumpTo(code_data.GetCodeSize());
122 ASSERT(arg_nums >= DEFAULT_ARGUMENT_NUMBER);
123 while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) {
124 const size_t count = GetVRegCount(bc_ins);
125 if (count == 0) { // Skip instructions that do not use registers
126 bc_ins = bc_ins.GetNext();
127 continue;
128 }
129 if (!CheckVRegIdx(bc_ins, count, valid_regs_num.value())) {
130 return false;
131 }
132 bc_ins = bc_ins.GetNext();
133 }
134 }
135 return true;
136 }
137
VerifyConstantPoolIndex()138 bool Verifier::VerifyConstantPoolIndex()
139 {
140 if (file_ == nullptr) {
141 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
142 return false;
143 }
144
145 if (!CheckConstantPoolIndex()) {
146 return false;
147 }
148
149 return true;
150 }
151
VerifyConstantPoolContent()152 bool Verifier::VerifyConstantPoolContent()
153 {
154 if (file_ == nullptr) {
155 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
156 return false;
157 }
158
159 if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOLCONTENT)) {
160 return false;
161 }
162
163 if (!VerifyLiteralArrays()) {
164 return false;
165 }
166
167 return true;
168 }
169
GetConstantPoolIds()170 void Verifier::GetConstantPoolIds()
171 {
172 if (constant_pool_ids_.size() != 0) {
173 return;
174 }
175 auto index_headers = file_->GetIndexHeaders();
176 for (const auto &index_header : index_headers) {
177 auto region_indexs = file_->GetMethodIndex(&index_header);
178 for (auto &index : region_indexs) {
179 constant_pool_ids_.push_back(index.GetOffset());
180 }
181 }
182 }
183
GetLiteralIds()184 void Verifier::GetLiteralIds()
185 {
186 if (literal_ids_.size() != 0) {
187 return;
188 }
189
190 if (panda_file::ContainsLiteralArrayInHeader(file_->GetHeader()->version)) {
191 const auto literal_arrays = file_->GetLiteralArrays();
192 PushToLiteralIds(literal_arrays);
193 } else {
194 panda::libpandafile::CollectUtil collect_util;
195 std::unordered_set<uint32_t> literal_array_ids;
196 collect_util.CollectLiteralArray(*file_, literal_array_ids);
197 PushToLiteralIds(literal_array_ids);
198 }
199 }
200
201 template <typename T>
PushToLiteralIds(T & ids)202 void Verifier::PushToLiteralIds(T &ids)
203 {
204 for (const auto id : ids) {
205 literal_ids_.push_back(id);
206 }
207 }
208
CheckConstantPoolActions(const verifier::ActionType type,panda_file::File::EntityId method_id)209 bool Verifier::CheckConstantPoolActions(const verifier::ActionType type, panda_file::File::EntityId method_id)
210 {
211 switch (type) {
212 case verifier::ActionType::CHECKCONSTPOOLCONTENT: {
213 return CheckConstantPoolMethodContent(method_id);
214 }
215 case verifier::ActionType::COLLECTINFOS: {
216 all_method_ids_.push_back(method_id.GetOffset());
217 return CollectIdInInstructions(method_id);
218 }
219 default: {
220 return true;
221 }
222 }
223 }
224
CollectIdInInstructions(const panda_file::File::EntityId & method_id)225 bool Verifier::CollectIdInInstructions(const panda_file::File::EntityId &method_id)
226 {
227 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
228 if (!method_accessor.GetCodeId().has_value()) {
229 return false;
230 }
231 panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value());
232 const auto ins_size = code_accessor.GetCodeSize();
233 const auto ins_arr = code_accessor.GetInstructions();
234
235 auto bc_ins = BytecodeInstruction(ins_arr);
236 const auto bc_ins_last = bc_ins.JumpTo(ins_size);
237
238 while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) {
239 if (!bc_ins.IsPrimaryOpcodeValid()) {
240 LOG(ERROR, VERIFIER) << "Fail to verify primary opcode!";
241 return false;
242 }
243 if (bc_ins.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
244 // the idx of any instruction with a literal id is 0
245 // except defineclasswithbuffer/callruntime.definesendableclass
246 size_t idx = bc_ins.GetLiteralIndex();
247 const auto arg_literal_idx = bc_ins.GetId(idx).AsIndex();
248 const auto literal_id = file_->ResolveMethodIndex(method_id, arg_literal_idx);
249 ins_literal_ids_.insert(literal_id.GetOffset());
250 }
251 if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
252 const auto arg_method_idx = bc_ins.GetId().AsIndex();
253 const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
254 ins_method_ids_.insert(arg_method_id.GetOffset());
255 }
256 if (bc_ins.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
257 const auto arg_string_idx = bc_ins.GetId().AsIndex();
258 const auto string_id = file_->ResolveOffsetByIndex(method_id, arg_string_idx);
259 ins_string_ids_.insert(string_id.GetOffset());
260 }
261 bc_ins = bc_ins.GetNext();
262 }
263 return true;
264 }
265
CollectModuleLiteralId(const panda_file::File::EntityId & field_id)266 void Verifier::CollectModuleLiteralId(const panda_file::File::EntityId &field_id)
267 {
268 panda_file::FieldDataAccessor field_accessor(*file_, field_id);
269 const auto literal_id = field_accessor.GetValue<uint32_t>().value();
270 if (std::find(literal_ids_.begin(), literal_ids_.end(), literal_id) != literal_ids_.end()) {
271 module_literals_.insert(literal_id);
272 }
273 }
274
CheckConstantPool(const verifier::ActionType type)275 bool Verifier::CheckConstantPool(const verifier::ActionType type)
276 {
277 const auto class_idx = file_->GetClasses();
278 for (size_t i = 0; i < class_idx.size(); i++) {
279 uint32_t class_id = class_idx[i];
280 if (class_id > file_->GetHeader()->file_size) {
281 LOG(ERROR, VERIFIER) << "Binary file corrupted. out of bounds (0x" << std::hex
282 << file_->GetHeader()->file_size;
283 return false;
284 }
285 const panda_file::File::EntityId record_id {class_id};
286 if (!file_->IsExternal(record_id)) {
287 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
288 bool check_res = true;
289 class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
290 check_res = check_res && CheckConstantPoolActions(type, method_accessor.GetMethodId());
291 });
292 if (!check_res) {
293 return false;
294 }
295 if (type == verifier::ActionType::COLLECTINFOS) {
296 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
297 CollectModuleLiteralId(field_accessor.GetFieldId());
298 });
299 }
300 }
301 }
302
303 return true;
304 }
305
GetVRegCount(const BytecodeInstruction & bc_ins)306 size_t Verifier::GetVRegCount(const BytecodeInstruction &bc_ins)
307 {
308 size_t idx = 0; // Represents the idxTH register index in an instruction
309 BytecodeInstruction::Format format = bc_ins.GetFormat();
310 while (bc_ins.HasVReg(format, idx)) {
311 idx++;
312 }
313 return idx;
314 }
315
IsRangeInstAndHasInvalidRegIdx(const BytecodeInstruction & bc_ins,const size_t count,uint64_t valid_regs_num)316 bool Verifier::IsRangeInstAndHasInvalidRegIdx(const BytecodeInstruction &bc_ins,
317 const size_t count, uint64_t valid_regs_num)
318 {
319 ASSERT(bc_ins.IsRangeInstruction());
320
321 uint64_t reg_idx = bc_ins.GetVReg(FIRST_INDEX);
322 if (IsRegIdxOutOfBounds(reg_idx, valid_regs_num)) { // for [format: +AA/+AAAA vBB vCC], vBB can be verified here
323 return true;
324 }
325
326 std::optional<uint64_t> max_ins_reg_idx_opt = bc_ins.GetRangeInsLastRegIdx();
327 if (!max_ins_reg_idx_opt.has_value()) {
328 LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
329 return true;
330 }
331
332 reg_idx = max_ins_reg_idx_opt.value();
333 if (IsRegIdxOutOfBounds(reg_idx, valid_regs_num)) {
334 return true;
335 }
336
337 return false;
338 }
339
IsRegIdxOutOfBounds(uint64_t reg_idx,uint64_t valid_regs_num)340 bool Verifier::IsRegIdxOutOfBounds(uint64_t reg_idx, uint64_t valid_regs_num)
341 {
342 if (reg_idx >= valid_regs_num) {
343 LOG(ERROR, VERIFIER) << "Register index out of bounds: 0x" << std::hex
344 << reg_idx << ", Max allowed: 0x" << std::hex << valid_regs_num;
345 return true;
346 }
347 return false;
348 }
349
CheckVRegIdx(const BytecodeInstruction & bc_ins,const size_t count,uint64_t valid_regs_num)350 bool Verifier::CheckVRegIdx(const BytecodeInstruction &bc_ins, const size_t count, uint64_t valid_regs_num)
351 {
352 if (bc_ins.IsRangeInstruction() &&
353 IsRangeInstAndHasInvalidRegIdx(bc_ins, count, valid_regs_num)) {
354 return false;
355 }
356 for (size_t idx = 0; idx < count; idx++) { // Represents the idxTH register index in an instruction
357 uint16_t reg_idx = bc_ins.GetVReg(idx);
358 if (reg_idx >= valid_regs_num) {
359 LOG(ERROR, VERIFIER) << "Register index out of bounds: 0x" << std::hex
360 << reg_idx << ", Max allowed: 0x" << std::hex << valid_regs_num;
361 return false;
362 }
363 }
364 return true;
365 }
366
VerifyMethodId(const uint32_t & method_id) const367 bool Verifier::VerifyMethodId(const uint32_t &method_id) const
368 {
369 auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), method_id);
370 if (iter == constant_pool_ids_.end() ||
371 (std::find(literal_ids_.begin(), literal_ids_.end(), method_id) != literal_ids_.end()) ||
372 ins_string_ids_.count(method_id)) {
373 LOG(ERROR, VERIFIER) << "Fail to verify method id. method_id(0x" << std::hex << method_id << ")!";
374 return false;
375 }
376 return true;
377 }
378
VerifyLiteralId(const uint32_t & literal_id) const379 bool Verifier::VerifyLiteralId(const uint32_t &literal_id) const
380 {
381 auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), literal_id);
382 if (iter == constant_pool_ids_.end() ||
383 (std::find(all_method_ids_.begin(), all_method_ids_.end(), literal_id) != all_method_ids_.end()) ||
384 ins_string_ids_.count(literal_id)) {
385 LOG(ERROR, VERIFIER) << "Fail to verify literal id. literal_id(0x" << std::hex << literal_id << ")!";
386 return false;
387 }
388 return true;
389 }
390
VerifyStringId(const uint32_t & string_id) const391 bool Verifier::VerifyStringId(const uint32_t &string_id) const
392 {
393 auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), string_id);
394 if (iter == constant_pool_ids_.end() ||
395 ins_method_ids_.count(string_id) ||
396 (std::find(literal_ids_.begin(), literal_ids_.end(), string_id) != literal_ids_.end())) {
397 LOG(ERROR, VERIFIER) << "Fail to verify string id. string_id(0x" << std::hex << string_id << ")!";
398 return false;
399 }
400 return true;
401 }
402
GetFirstImmFromInstruction(const BytecodeInstruction & bc_ins)403 std::optional<int64_t> Verifier::GetFirstImmFromInstruction(const BytecodeInstruction &bc_ins)
404 {
405 std::optional<int64_t> first_imm = std::optional<int64_t> {};
406 size_t index = 0;
407 const auto format = bc_ins.GetFormat();
408 if (bc_ins.HasImm(format, index)) {
409 first_imm = bc_ins.GetImm64(index);
410 }
411
412 return first_imm;
413 }
414
GetSlotNumberFromAnnotation(panda_file::MethodDataAccessor & method_accessor)415 std::optional<uint64_t> Verifier::GetSlotNumberFromAnnotation(panda_file::MethodDataAccessor &method_accessor)
416 {
417 std::optional<uint64_t> slot_number {};
418 method_accessor.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
419 panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
420 auto *annotation_name = reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data);
421 if (::strcmp("L_ESSlotNumberAnnotation;", annotation_name) == 0) {
422 uint32_t elem_count = ada.GetCount();
423 for (uint32_t i = 0; i < elem_count; i++) {
424 panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
425 auto *elem_name = reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data);
426 if (::strcmp("SlotNumber", elem_name) == 0) {
427 slot_number = adae.GetScalarValue().GetValue();
428 }
429 }
430 }
431 });
432 return slot_number;
433 }
434
VerifyMethodIdInLiteralArray(const uint32_t & id)435 bool Verifier::VerifyMethodIdInLiteralArray(const uint32_t &id)
436 {
437 const auto method_id = panda_file::File::EntityId(id).GetOffset();
438 auto iter = std::find(all_method_ids_.begin(), all_method_ids_.end(), method_id);
439 if (iter == all_method_ids_.end()) {
440 LOG(ERROR, VERIFIER) << "Invalid method id(0x" << id << ") in literal array";
441 return false;
442 }
443 return true;
444 }
445
VerifyStringIdInLiteralArray(const uint32_t & id)446 bool Verifier::VerifyStringIdInLiteralArray(const uint32_t &id)
447 {
448 auto string_data = file_->GetStringData(panda_file::File::EntityId(id));
449 if (string_data.data == nullptr) {
450 LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << id << ")!";
451 return false;
452 }
453 auto desc = std::string(utf::Mutf8AsCString(string_data.data));
454 std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
455 std::wstring utf16_desc = converter.from_bytes(desc);
456 if (string_data.utf16_length != utf16_desc.length()) {
457 LOG(ERROR, VERIFIER) << "Invalid string value(0x" << id << ") in literal array";
458 return false;
459 }
460 return true;
461 }
462
VerifyLiteralIdInLiteralArray(const uint32_t & id)463 bool Verifier::VerifyLiteralIdInLiteralArray(const uint32_t &id)
464 {
465 auto iter = std::find(literal_ids_.begin(), literal_ids_.end(), id);
466 if (iter == literal_ids_.end()) {
467 LOG(ERROR, VERIFIER) << "Invalid literal id(0x" << id << ") in literal array";
468 return false;
469 }
470 return true;
471 }
472
VerifySingleLiteralArray(const panda_file::File::EntityId & literal_id)473 bool Verifier::VerifySingleLiteralArray(const panda_file::File::EntityId &literal_id)
474 {
475 auto sp = file_->GetSpanFromId(literal_id);
476 const auto literal_vals_num = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
477 for (size_t i = 0; i < literal_vals_num; i += 2U) { // 2u skip literal item
478 const auto tag = static_cast<panda_file::LiteralTag>(panda_file::helpers::Read<panda_file::TAG_SIZE>(&sp));
479 switch (tag) {
480 case panda_file::LiteralTag::TAGVALUE:
481 case panda_file::LiteralTag::BOOL:
482 case panda_file::LiteralTag::ACCESSOR:
483 case panda_file::LiteralTag::NULLVALUE:
484 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
485 sp = sp.SubSpan(sizeof(uint8_t)); // run next sp
486 break;
487 }
488 case panda_file::LiteralTag::METHODAFFILIATE: {
489 sp = sp.SubSpan(sizeof(uint16_t));
490 break;
491 }
492 case panda_file::LiteralTag::INTEGER:
493 case panda_file::LiteralTag::FLOAT:
494 case panda_file::LiteralTag::GETTER:
495 case panda_file::LiteralTag::SETTER:
496 case panda_file::LiteralTag::GENERATORMETHOD:
497 case panda_file::LiteralTag::LITERALBUFFERINDEX:
498 case panda_file::LiteralTag::ASYNCGENERATORMETHOD: {
499 sp = sp.SubSpan(sizeof(uint32_t));
500 break;
501 }
502 case panda_file::LiteralTag::DOUBLE: {
503 const auto value = bit_cast<double>(panda_file::helpers::Read<sizeof(uint64_t)>(&sp));
504 // true: High 16-bit of double value >= 0xffff
505 if (IsImpureNaN(value)) {
506 LOG(ERROR, VERIFIER) << "Fail to verify double value " << value << " in literal array";
507 return false;
508 }
509 break;
510 }
511 case panda_file::LiteralTag::ARRAY_U1:
512 case panda_file::LiteralTag::ARRAY_U8:
513 case panda_file::LiteralTag::ARRAY_I8:
514 case panda_file::LiteralTag::ARRAY_U16:
515 case panda_file::LiteralTag::ARRAY_I16:
516 case panda_file::LiteralTag::ARRAY_U32:
517 case panda_file::LiteralTag::ARRAY_I32:
518 case panda_file::LiteralTag::ARRAY_U64:
519 case panda_file::LiteralTag::ARRAY_I64:
520 case panda_file::LiteralTag::ARRAY_F32:
521 case panda_file::LiteralTag::ARRAY_F64:
522 case panda_file::LiteralTag::ARRAY_STRING: {
523 i = literal_vals_num;
524 break;
525 }
526 case panda_file::LiteralTag::STRING: {
527 panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
528 break;
529 }
530 case panda_file::LiteralTag::METHOD: {
531 const auto value = static_cast<uint32_t>(panda_file::helpers::Read<sizeof(uint32_t)>(&sp));
532 inner_method_map_.emplace(literal_id.GetOffset(), value);
533 if (!VerifyMethodIdInLiteralArray(value)) {
534 return false;
535 }
536 break;
537 }
538 case panda_file::LiteralTag::LITERALARRAY: {
539 const auto value = static_cast<uint32_t>(panda_file::helpers::Read<sizeof(uint32_t)>(&sp));
540 inner_literal_map_.emplace(literal_id.GetOffset(), value);
541 if (!VerifyLiteralIdInLiteralArray(value)) {
542 return false;
543 }
544 break;
545 }
546 default: {
547 LOG(ERROR, VERIFIER) << "Invalid literal tag";
548 return false;
549 }
550 }
551 }
552 return true;
553 }
554
IsModuleLiteralId(const panda_file::File::EntityId & id) const555 bool Verifier::IsModuleLiteralId(const panda_file::File::EntityId &id) const
556 {
557 return module_literals_.find(id.GetOffset()) != module_literals_.end();
558 }
559
VerifyLiteralArrays()560 bool Verifier::VerifyLiteralArrays()
561 {
562 for (const auto &arg_literal_id : literal_ids_) {
563 const auto literal_id = panda_file::File::EntityId(arg_literal_id);
564 if (!IsModuleLiteralId(literal_id) && !VerifySingleLiteralArray(literal_id)) {
565 return false;
566 }
567 }
568 return true;
569 }
570
PrecomputeInstructionIndices(const BytecodeInstruction & bc_ins_start,const BytecodeInstruction & bc_ins_last)571 bool Verifier::PrecomputeInstructionIndices(const BytecodeInstruction &bc_ins_start,
572 const BytecodeInstruction &bc_ins_last)
573 {
574 instruction_index_map_.clear();
575 size_t index = 0;
576 auto current_ins = bc_ins_start;
577 instruction_index_map_[current_ins.GetAddress()] = index;
578
579 while (current_ins.GetAddress() < bc_ins_last.GetAddress()) {
580 //Must keep IsPrimaryOpcodeValid is the first check item
581 if (!current_ins.IsPrimaryOpcodeValid()) {
582 LOG(ERROR, VERIFIER) << "Fail to verify primary opcode!";
583 return false;
584 }
585 current_ins = current_ins.GetNext();
586 index++;
587 instruction_index_map_[current_ins.GetAddress()] = index;
588 }
589 return true;
590 }
591
IsMethodBytecodeInstruction(const BytecodeInstruction & bc_ins_cur)592 bool Verifier::IsMethodBytecodeInstruction(const BytecodeInstruction &bc_ins_cur)
593 {
594 if (instruction_index_map_.find(bc_ins_cur.GetAddress()) != instruction_index_map_.end()) {
595 return true;
596 }
597 return false;
598 }
599
VerifyJumpInstruction(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const BytecodeInstruction & bc_ins_first,const uint8_t * ins_arr,panda_file::File::EntityId code_id)600 bool Verifier::VerifyJumpInstruction(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
601 const BytecodeInstruction &bc_ins_first, const uint8_t *ins_arr,
602 panda_file::File::EntityId code_id)
603 {
604 // update maximum forward offset
605 const auto bc_ins_forward_size = bc_ins_last.GetAddress() - bc_ins.GetAddress();
606 // update maximum backward offset
607 const auto bc_ins_backward_size = bc_ins.GetAddress() - bc_ins_first.GetAddress();
608
609 if (bc_ins.IsJumpInstruction()) {
610 std::optional<int64_t> immdata = GetFirstImmFromInstruction(bc_ins);
611 if (!immdata.has_value()) {
612 LOG(ERROR, VERIFIER) << "Fail to get immediate data!";
613 return false;
614 }
615 if ((immdata.value() > 0) && (immdata.value() >= bc_ins_forward_size)) {
616 LOG(ERROR, VERIFIER) << "Jump forward out of boundary";
617 return false;
618 }
619 if ((immdata.value() < 0) && (bc_ins_backward_size + immdata.value() < 0)) {
620 LOG(ERROR, VERIFIER) << "Jump backward out of boundary";
621 return false;
622 }
623
624 const auto bc_ins_dest = bc_ins.JumpTo(immdata.value());
625 if (!bc_ins_dest.IsPrimaryOpcodeValid()) {
626 LOG(ERROR, VERIFIER) << "Fail to verify target jump primary opcode!";
627 return false;
628 }
629 if (!IsMethodBytecodeInstruction(bc_ins_dest)) {
630 LOG(ERROR, VERIFIER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
631 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
632 << ": invalid jump offset 0x" << immdata.value()
633 << " - jumping in the middle of another instruction!";
634 return false;
635 }
636 }
637
638 return true;
639 }
640
GetIcSlotFromInstruction(const BytecodeInstruction & bc_ins,uint32_t & first_slot_index,bool & has_slot,bool & is_two_slot)641 bool Verifier::GetIcSlotFromInstruction(const BytecodeInstruction &bc_ins, uint32_t &first_slot_index,
642 bool &has_slot, bool &is_two_slot)
643 {
644 std::optional<uint64_t> first_imm = {};
645 if (bc_ins.HasFlag(BytecodeInstruction::Flags::ONE_SLOT)) {
646 first_imm = GetFirstImmFromInstruction(bc_ins);
647 if (!first_imm.has_value()) {
648 LOG(ERROR, VERIFIER) << "Fail to get first immediate data!";
649 return false;
650 }
651 first_slot_index = first_imm.value();
652 is_two_slot = false;
653 has_slot = true;
654 } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::TWO_SLOT)) {
655 first_imm = GetFirstImmFromInstruction(bc_ins);
656 if (!first_imm.has_value()) {
657 LOG(ERROR, VERIFIER) << "Fail to get first immediate data!";
658 return false;
659 }
660 first_slot_index = first_imm.value();
661 has_slot = true;
662 is_two_slot = true;
663 }
664
665 return true;
666 }
667
VerifyCatchBlocks(panda_file::CodeDataAccessor::TryBlock & try_block,const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last)668 bool Verifier::VerifyCatchBlocks(panda_file::CodeDataAccessor::TryBlock &try_block, const BytecodeInstruction &bc_ins,
669 const BytecodeInstruction &bc_ins_last)
670 {
671 bool result = true;
672
673 try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
674 const auto handler_begin_offset = catch_block.GetHandlerPc();
675 // GetCodeSize() returns a unsigned long value, which is always >= 0,
676 // so handler_end_offset is guaranteed to be >= handler_begin_offset
677 const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
678
679 const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
680 const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
681
682 const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
683 const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
684
685 if (!handler_begin_offset_in_range) {
686 LOG(ERROR, VERIFIER) << "> Invalid catch block begin offset range! address is: 0x" << std::hex
687 << handler_begin_bc_ins.GetAddress();
688 result = false;
689 return false;
690 }
691 if (!IsMethodBytecodeInstruction(handler_begin_bc_ins)) {
692 LOG(ERROR, VERIFIER) << "> Invalid catch block begin offset validity! address is: 0x" << std::hex
693 << handler_begin_bc_ins.GetAddress();
694 result = false;
695 return false;
696 }
697 if (!handler_end_offset_in_range) {
698 LOG(ERROR, VERIFIER) << "> Invalid catch block end offset range! address is: 0x" << std::hex
699 << handler_end_bc_ins.GetAddress();
700 result = false;
701 return false;
702 }
703 if (!IsMethodBytecodeInstruction(handler_end_bc_ins)) {
704 LOG(ERROR, VERIFIER) << "> Invalid catch block end offset validity! address is: 0x" << std::hex
705 << handler_end_bc_ins.GetAddress();
706 result = false;
707 return false;
708 }
709
710 return true;
711 });
712
713 return result;
714 }
715
VerifyTryBlocks(panda_file::CodeDataAccessor & code_accessor,const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last)716 bool Verifier::VerifyTryBlocks(panda_file::CodeDataAccessor &code_accessor, const BytecodeInstruction &bc_ins,
717 const BytecodeInstruction &bc_ins_last)
718 {
719 bool result = true;
720
721 code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
722 const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
723 // GetLength() returns a uint32 value, which is always >= 0,
724 // so try_end_bc_ins is guaranteed to be >= try_begin_bc_ins
725 const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
726
727 const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
728 const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
729
730 if (!try_begin_offset_in_range) {
731 LOG(ERROR, VERIFIER) << "> Invalid try block begin offset range! address is: 0x" << std::hex
732 << try_begin_bc_ins.GetAddress();
733 result = false;
734 return false;
735 }
736 if (!IsMethodBytecodeInstruction(try_begin_bc_ins)) {
737 LOG(ERROR, VERIFIER) << "> Invalid try block begin offset validity! address is: 0x" << std::hex
738 << try_begin_bc_ins.GetAddress();
739 result = false;
740 return false;
741 }
742 if (!try_end_offset_in_range) {
743 LOG(ERROR, VERIFIER) << "> Invalid try block end offset range! address is: 0x" << std::hex
744 << try_end_bc_ins.GetAddress();
745 result = false;
746 return false;
747 }
748 if (!IsMethodBytecodeInstruction(try_end_bc_ins)) {
749 LOG(ERROR, VERIFIER) << "> Invalid try block end offset validity! address is: 0x" << std::hex
750 << try_end_bc_ins.GetAddress();
751 result = false;
752 return false;
753 }
754 if (!VerifyCatchBlocks(try_block, bc_ins, bc_ins_last)) {
755 LOG(ERROR, VERIFIER) << "Catch block validation failed!";
756 result = false;
757 return false;
758 }
759
760 return true;
761 });
762
763 return result;
764 }
765
766
VerifySlotNumber(panda_file::MethodDataAccessor & method_accessor,const uint32_t & slot_number,const panda_file::File::EntityId & method_id)767 bool Verifier::VerifySlotNumber(panda_file::MethodDataAccessor &method_accessor, const uint32_t &slot_number,
768 const panda_file::File::EntityId &method_id)
769 {
770 const auto ann_slot_number = GetSlotNumberFromAnnotation(method_accessor);
771 if (!ann_slot_number.has_value()) {
772 LOG(INFO, VERIFIER) << "There is no slot number information in annotaion.";
773 // To be compatible with old abc, slot number verification is not continued
774 return true;
775 }
776 if (slot_number == ann_slot_number.value()) {
777 return true;
778 }
779
780 LOG(ERROR, VERIFIER) << "Slot number has been falsified in method 0x" << method_id;
781 return false;
782 }
783
VerifyMethodRegisterIndex(panda_file::CodeDataAccessor & code_accessor,std::optional<uint64_t> & valid_regs_num)784 bool Verifier::VerifyMethodRegisterIndex(panda_file::CodeDataAccessor &code_accessor,
785 std::optional<uint64_t> &valid_regs_num)
786 {
787 const uint64_t reg_nums = code_accessor.GetNumVregs();
788 const uint64_t arg_nums = code_accessor.GetNumArgs();
789 valid_regs_num = SafeAdd(reg_nums, arg_nums);
790 if (!valid_regs_num.has_value()) {
791 LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
792 return false;
793 }
794 if (valid_regs_num.value() > MAX_REGISTER_INDEX + 1) {
795 LOG(ERROR, VERIFIER) << "Register index exceeds the maximum allowable value (0xffff)!";
796 return false;
797 }
798 return true;
799 }
800
VerifyMethodInstructions(const MethodInfos & infos)801 bool Verifier::VerifyMethodInstructions(const MethodInfos &infos)
802 {
803 auto current_ins = infos.bc_ins;
804 auto last_ins = infos.bc_ins_last;
805 auto code_id = infos.method_accessor.GetCodeId().value();
806 auto method_id = infos.method_id;
807 auto valid_regs_num = infos.valid_regs_num.value();
808 auto ins_slot_num = infos.ins_slot_num;
809 auto has_slot = infos.has_slot;
810 auto is_two_slot = infos.is_two_slot;
811
812 while (current_ins.GetAddress() != last_ins.GetAddress()) {
813 if (current_ins.GetAddress() > last_ins.GetAddress()) {
814 LOG(ERROR, VERIFIER) << "> error encountered at " << code_id
815 << " (0x" << std::hex << code_id
816 << "). bytecode instructions sequence corrupted for method "
817 << method_id
818 << "! went out of bounds";
819 return false;
820 }
821 if (!current_ins.IsJumpInstruction() && !current_ins.IsReturnOrThrowInstruction()
822 && current_ins.GetNext().GetAddress() == last_ins.GetAddress()) {
823 LOG(ERROR, VERIFIER) << "> error encountered at " << code_id
824 << " (0x" << std::hex << code_id
825 << "). bytecode instructions sequence corrupted for method "
826 << method_id
827 << "! went out of bounds";
828 return false;
829 }
830 const size_t count = GetVRegCount(current_ins);
831 if (count != 0 && !CheckVRegIdx(current_ins, count, valid_regs_num)) {
832 return false;
833 }
834 if (!VerifyJumpInstruction(current_ins, last_ins,
835 infos.bc_ins_init, infos.ins_arr,
836 code_id)) {
837 LOG(ERROR, VERIFIER) << "Invalid target position of jump instruction";
838 return false;
839 }
840 if (!GetIcSlotFromInstruction(current_ins, ins_slot_num,
841 has_slot, is_two_slot)) {
842 LOG(ERROR, VERIFIER) << "Fail to get first slot index!";
843 return false;
844 }
845 current_ins = current_ins.GetNext();
846 }
847 return true;
848 }
849
CheckConstantPoolMethodContent(const panda_file::File::EntityId & method_id)850 bool Verifier::CheckConstantPoolMethodContent(const panda_file::File::EntityId &method_id)
851 {
852 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
853 if (!method_accessor.GetCodeId().has_value()) {
854 LOG(ERROR, VERIFIER) << "Fail to get code id!";
855 return false;
856 }
857 panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value());
858 const auto ins_size = code_accessor.GetCodeSize();
859 const auto ins_arr = code_accessor.GetInstructions();
860 auto bc_ins = BytecodeInstruction(ins_arr);
861 const auto bc_ins_last = bc_ins.JumpTo(ins_size);
862 const auto bc_ins_init = bc_ins; // initial PC value
863 uint32_t ins_slot_num = 0; // For ic slot index verification
864 bool has_slot = false;
865 bool is_two_slot = false;
866 std::optional<uint64_t> valid_regs_num = 0;
867 MethodInfos infos = {bc_ins_init, bc_ins, bc_ins_last, method_accessor, method_id,
868 valid_regs_num, ins_arr, ins_slot_num, has_slot, is_two_slot};
869 if (ins_size <= 0) {
870 LOG(ERROR, VERIFIER) << "Fail to verify code size!";
871 return false;
872 }
873 if (!VerifyMethodRegisterIndex(code_accessor, valid_regs_num)) {
874 LOG(ERROR, VERIFIER) << "Fail to verify method register index!";
875 return false;
876 }
877 if (!PrecomputeInstructionIndices(bc_ins, bc_ins_last)) {
878 LOG(ERROR, VERIFIER) << "Fail to precompute instruction indices!";
879 return false;
880 }
881 if (!IsMethodBytecodeInstruction(bc_ins)) {
882 LOG(ERROR, VERIFIER) << "Fail to verify method first bytecode instruction!";
883 }
884 if (!VerifyTryBlocks(code_accessor, bc_ins, bc_ins_last)) {
885 LOG(ERROR, VERIFIER) << "Fail to verify try blocks or catch blocks!";
886 return false;
887 }
888 if (!VerifyMethodInstructions(infos)) {
889 LOG(ERROR, VERIFIER) << "Fail to verify method instructions!";
890 return false;
891 }
892 if (has_slot) {
893 if (is_two_slot) {
894 ins_slot_num += 1; // when there are two slots for the last instruction, the slot index increases
895 }
896 ins_slot_num += 1; // slot index starts with zero
897 }
898 return true;
899 }
900
CheckConstantPoolIndex() const901 bool Verifier::CheckConstantPoolIndex() const
902 {
903 for (auto &id : ins_method_ids_) {
904 if (!VerifyMethodId(id)) {
905 return false;
906 }
907 }
908
909 for (auto &id : ins_literal_ids_) {
910 if (!VerifyLiteralId(id)) {
911 return false;
912 }
913 }
914
915 for (auto &id : ins_string_ids_) {
916 if (!VerifyStringId(id)) {
917 return false;
918 }
919 }
920
921 return true;
922 }
923
SafeAdd(uint64_t a,uint64_t b) const924 std::optional<uint64_t> Verifier::SafeAdd(uint64_t a, uint64_t b) const
925 {
926 if (a > std::numeric_limits<uint64_t>::max() - b) {
927 return std::nullopt;
928 }
929 return a + b;
930 }
931 } // namespace panda::verifier
932