1 /** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef LIBPANDAFILE_FILE_H 17 #define LIBPANDAFILE_FILE_H 18 19 #include <cstdint> 20 #include "helpers.h" 21 #include "os/mem.h" 22 #include "os/filesystem.h" 23 #include "utils/span.h" 24 #include "utils/utf.h" 25 #include "utils/logger.h" 26 27 #include <array> 28 #include <iostream> 29 #include <memory> 30 #include <string> 31 #include <string_view> 32 33 namespace panda { 34 struct EntryFileStat; 35 } // namespace panda 36 37 namespace panda::panda_file { 38 39 class PandaCache; 40 41 /* 42 * EntityPairHeader Describes pair for hash value of class's descriptor and its entity id offset, 43 * used to quickly find class by its descriptor. 44 */ 45 struct EntityPairHeader { 46 uint32_t descriptor_hash; 47 uint32_t entity_id_offset; 48 uint32_t next_pos; 49 }; 50 51 class File { 52 public: 53 using Index = uint16_t; 54 using Index32 = uint32_t; 55 56 static constexpr size_t MAGIC_SIZE = 8; 57 static constexpr size_t VERSION_SIZE = 4; 58 static const std::array<uint8_t, MAGIC_SIZE> MAGIC; 59 60 struct Header { 61 std::array<uint8_t, MAGIC_SIZE> magic; 62 uint32_t checksum; 63 std::array<uint8_t, VERSION_SIZE> version; 64 uint32_t file_size; 65 uint32_t foreign_off; 66 uint32_t foreign_size; 67 uint32_t num_classes; 68 uint32_t class_idx_off; 69 uint32_t num_lnps; 70 uint32_t lnp_idx_off; 71 uint32_t num_literalarrays; 72 uint32_t literalarray_idx_off; 73 uint32_t num_indexes; 74 uint32_t index_section_off; 75 }; 76 77 struct IndexHeader { 78 uint32_t start; 79 uint32_t end; 80 uint32_t class_idx_size; 81 uint32_t class_idx_off; 82 uint32_t method_idx_size; 83 uint32_t method_idx_off; 84 uint32_t field_idx_size; 85 uint32_t field_idx_off; 86 uint32_t proto_idx_size; 87 uint32_t proto_idx_off; 88 }; 89 90 struct StringData { StringDataStringData91 StringData(uint32_t len, const uint8_t *d) : utf16_length(len), is_ascii(false), data(d) {} 92 StringData() = default; 93 uint32_t utf16_length; // NOLINT(misc-non-private-member-variables-in-classes) 94 bool is_ascii; // NOLINT(misc-non-private-member-variables-in-classes) 95 const uint8_t *data; // NOLINT(misc-non-private-member-variables-in-classes) 96 }; 97 98 // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions, hicpp-special-member-functions) 99 class EntityId { 100 public: EntityId(uint32_t offset)101 explicit constexpr EntityId(uint32_t offset) : offset_(offset) {} 102 103 EntityId() = default; 104 105 ~EntityId() = default; 106 IsValid()107 bool IsValid() const 108 { 109 return offset_ > sizeof(Header); 110 } 111 GetOffset()112 uint32_t GetOffset() const 113 { 114 return offset_; 115 } 116 GetSize()117 static constexpr size_t GetSize() 118 { 119 return sizeof(uint32_t); 120 } 121 122 friend bool operator<(const EntityId &l, const EntityId &r) 123 { 124 return l.offset_ < r.offset_; 125 } 126 127 friend bool operator==(const EntityId &l, const EntityId &r) 128 { 129 return l.offset_ == r.offset_; 130 } 131 132 friend std::ostream &operator<<(std::ostream &stream, const EntityId &id) 133 { 134 return stream << id.offset_; 135 } 136 137 private: 138 uint32_t offset_ {0}; 139 }; 140 141 enum OpenMode { READ_ONLY, READ_WRITE, WRITE_ONLY }; 142 143 StringData GetStringData(EntityId id) const; 144 EntityId GetLiteralArraysId() const; 145 146 EntityId GetClassId(const uint8_t *mutf8_name) const; 147 148 EntityId GetClassIdFromClassHashTable(const uint8_t *mutf8_name) const; 149 GetHeader()150 const Header *GetHeader() const 151 { 152 return reinterpret_cast<const Header *>(GetBase()); 153 } 154 GetBase()155 const uint8_t *GetBase() const 156 { 157 return reinterpret_cast<const uint8_t *>(base_.Get()); 158 } 159 GetPtr()160 const os::mem::ConstBytePtr &GetPtr() const 161 { 162 return base_; 163 } 164 IsExternal(EntityId id)165 bool IsExternal(EntityId id) const 166 { 167 const Header *header = GetHeader(); 168 uint32_t foreign_begin = header->foreign_off; 169 uint32_t foreign_end = foreign_begin + header->foreign_size; 170 return id.GetOffset() >= foreign_begin && id.GetOffset() < foreign_end; 171 } 172 GetIdFromPointer(const uint8_t * ptr)173 EntityId GetIdFromPointer(const uint8_t *ptr) const 174 { 175 return EntityId(ptr - GetBase()); 176 } 177 GetSpanFromId(EntityId id)178 Span<const uint8_t> GetSpanFromId(EntityId id) const 179 { 180 const Header *header = GetHeader(); 181 Span file(GetBase(), header->file_size); 182 THROW_IF(!id.IsValid() || id.GetOffset() >= file.size(), File::INVALID_FILE_OFFSET); 183 return file.Last(file.size() - id.GetOffset()); 184 } 185 GetClasses()186 Span<const uint32_t> GetClasses() const 187 { 188 const Header *header = GetHeader(); 189 Span file(GetBase(), header->file_size); 190 Span class_idx_data = file.SubSpan(header->class_idx_off, header->num_classes * sizeof(uint32_t)); 191 return Span(reinterpret_cast<const uint32_t *>(class_idx_data.data()), header->num_classes); 192 } 193 GetLiteralArrays()194 Span<const uint32_t> GetLiteralArrays() const 195 { 196 const Header *header = GetHeader(); 197 Span file(GetBase(), header->file_size); 198 Span litarr_idx_data = file.SubSpan(header->literalarray_idx_off, header->num_literalarrays * sizeof(uint32_t)); 199 return Span(reinterpret_cast<const uint32_t *>(litarr_idx_data.data()), header->num_literalarrays); 200 } 201 GetIndexHeaders()202 Span<const IndexHeader> GetIndexHeaders() const 203 { 204 const Header *header = GetHeader(); 205 Span file(GetBase(), header->file_size); 206 auto sp = file.SubSpan(header->index_section_off, header->num_indexes * sizeof(IndexHeader)); 207 return Span(reinterpret_cast<const IndexHeader *>(sp.data()), header->num_indexes); 208 } 209 GetIndexHeader(EntityId id)210 const IndexHeader *GetIndexHeader(EntityId id) const 211 { 212 if (UNLIKELY(!id.IsValid() || id.GetOffset() >= GetHeader()->file_size)) { 213 return nullptr; 214 } 215 auto headers = GetIndexHeaders(); 216 auto offset = id.GetOffset(); 217 for (const auto &header : headers) { 218 if (header.start <= offset && offset < header.end) { 219 return &header; 220 } 221 } 222 return nullptr; 223 } 224 GetClassIndex(const IndexHeader * index_header)225 Span<const EntityId> GetClassIndex(const IndexHeader *index_header) const 226 { 227 THROW_IF(index_header == nullptr, "index_header is null"); 228 auto *header = GetHeader(); 229 Span file(GetBase(), header->file_size); 230 ASSERT(index_header != nullptr); 231 auto class_idx_size = index_header->class_idx_size * EntityId::GetSize(); 232 THROW_IF(index_header->class_idx_off > header->file_size || class_idx_size > header->file_size || 233 index_header->class_idx_off > header->file_size - class_idx_size, "index_header is invalid"); 234 auto sp = file.SubSpan(index_header->class_idx_off, index_header->class_idx_size * EntityId::GetSize()); 235 return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->class_idx_size); 236 } 237 GetClassIndex(EntityId id)238 Span<const EntityId> GetClassIndex(EntityId id) const 239 { 240 auto *index_header = GetIndexHeader(id); 241 return GetClassIndex(index_header); 242 } 243 GetMethodIndex(const IndexHeader * index_header)244 Span<const EntityId> GetMethodIndex(const IndexHeader *index_header) const 245 { 246 THROW_IF(index_header == nullptr, "index_header is null"); 247 auto *header = GetHeader(); 248 Span file(GetBase(), header->file_size); 249 ASSERT(index_header != nullptr); 250 auto method_idx_size = index_header->method_idx_size * EntityId::GetSize(); 251 THROW_IF(index_header->method_idx_off > header->file_size || method_idx_size > header->file_size || 252 index_header->method_idx_off > header->file_size - method_idx_size, "index_header is invalid"); 253 auto sp = file.SubSpan(index_header->method_idx_off, index_header->method_idx_size * EntityId::GetSize()); 254 return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->method_idx_size); 255 } 256 GetMethodIndex(EntityId id)257 Span<const EntityId> GetMethodIndex(EntityId id) const 258 { 259 auto *index_header = GetIndexHeader(id); 260 return GetMethodIndex(index_header); 261 } 262 GetFieldIndex(const IndexHeader * index_header)263 Span<const EntityId> GetFieldIndex(const IndexHeader *index_header) const 264 { 265 THROW_IF(index_header == nullptr, "index_header is null"); 266 auto *header = GetHeader(); 267 Span file(GetBase(), header->file_size); 268 ASSERT(index_header != nullptr); 269 auto field_idx_size = index_header->field_idx_size * EntityId::GetSize(); 270 THROW_IF(index_header->field_idx_off > header->file_size || field_idx_size > header->file_size || 271 index_header->field_idx_off > header->file_size - field_idx_size, "index_header is invalid"); 272 auto sp = file.SubSpan(index_header->field_idx_off, index_header->field_idx_size * EntityId::GetSize()); 273 return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->field_idx_size); 274 } 275 GetFieldIndex(EntityId id)276 Span<const EntityId> GetFieldIndex(EntityId id) const 277 { 278 auto *index_header = GetIndexHeader(id); 279 return GetFieldIndex(index_header); 280 } 281 GetProtoIndex(const IndexHeader * index_header)282 Span<const EntityId> GetProtoIndex(const IndexHeader *index_header) const 283 { 284 THROW_IF(index_header == nullptr, "index_header is null"); 285 auto *header = GetHeader(); 286 Span file(GetBase(), header->file_size); 287 ASSERT(index_header != nullptr); 288 auto proto_idx_size = index_header->proto_idx_size * EntityId::GetSize(); 289 THROW_IF(index_header->proto_idx_off > header->file_size || proto_idx_size > header->file_size || 290 index_header->proto_idx_off > header->file_size - proto_idx_size, "index_header is invalid"); 291 auto sp = file.SubSpan(index_header->proto_idx_off, index_header->proto_idx_size * EntityId::GetSize()); 292 return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->proto_idx_size); 293 } 294 GetProtoIndex(EntityId id)295 Span<const EntityId> GetProtoIndex(EntityId id) const 296 { 297 auto *index_header = GetIndexHeader(id); 298 return GetProtoIndex(index_header); 299 } 300 GetLineNumberProgramIndex()301 Span<const EntityId> GetLineNumberProgramIndex() const 302 { 303 const Header *header = GetHeader(); 304 Span file(GetBase(), header->file_size); 305 Span lnp_idx_data = file.SubSpan(header->lnp_idx_off, header->num_lnps * EntityId::GetSize()); 306 return Span(reinterpret_cast<const EntityId *>(lnp_idx_data.data()), header->num_lnps); 307 } 308 ResolveClassIndex(EntityId id,Index idx)309 EntityId ResolveClassIndex(EntityId id, Index idx) const 310 { 311 auto index = GetClassIndex(id); 312 if (UNLIKELY(idx >= index.Size())) { 313 return EntityId(); 314 } 315 return index[idx]; 316 } 317 ResolveMethodIndex(EntityId id,Index idx)318 EntityId ResolveMethodIndex(EntityId id, Index idx) const 319 { 320 auto index = GetMethodIndex(id); 321 if (UNLIKELY(idx >= index.Size())) { 322 return EntityId(); 323 } 324 return index[idx]; 325 } 326 ResolveOffsetByIndex(EntityId id,Index idx)327 EntityId ResolveOffsetByIndex(EntityId id, Index idx) const 328 { 329 auto index = GetMethodIndex(id); 330 if (UNLIKELY(idx >= index.Size())) { 331 return EntityId(); 332 } 333 return index[idx]; 334 } 335 ResolveFieldIndex(EntityId id,Index idx)336 EntityId ResolveFieldIndex(EntityId id, Index idx) const 337 { 338 auto index = GetFieldIndex(id); 339 if (UNLIKELY(idx >= index.Size())) { 340 return EntityId(); 341 } 342 return index[idx]; 343 } 344 ResolveProtoIndex(EntityId id,Index idx)345 EntityId ResolveProtoIndex(EntityId id, Index idx) const 346 { 347 auto index = GetProtoIndex(id); 348 if (UNLIKELY(idx >= index.Size())) { 349 return EntityId(); 350 } 351 return index[idx]; 352 } 353 ResolveLineNumberProgramIndex(Index32 idx)354 EntityId ResolveLineNumberProgramIndex(Index32 idx) const 355 { 356 auto index = GetLineNumberProgramIndex(); 357 if (UNLIKELY(idx >= index.Size())) { 358 return EntityId(); 359 } 360 return index[idx]; 361 } 362 GetFilename()363 const std::string &GetFilename() const 364 { 365 return FILENAME; 366 } 367 GetPandaCache()368 PandaCache *GetPandaCache() const 369 { 370 #ifdef ENABLE_FULL_FILE_FIELDS 371 return panda_cache_.get(); 372 #else 373 LOG(WARNING, PANDAFILE) << "Not Support GetPandaCache from ohos side."; 374 return nullptr; 375 #endif 376 } 377 GetFilenameHash()378 uint32_t GetFilenameHash() const 379 { 380 return FILENAME_HASH; 381 } 382 383 // note: intentionally returns uint64_t instead of the field type due to usage GetUniqId()384 uint64_t GetUniqId() const 385 { 386 return UNIQ_ID; 387 } 388 GetFullFileName()389 const std::string &GetFullFileName() const 390 { 391 #ifdef ENABLE_FULL_FILE_FIELDS 392 return FULL_FILENAME; 393 #else 394 LOG(FATAL, PANDAFILE) << "Not Support GetFullFileName from ohos side."; 395 return FILENAME; 396 #endif 397 } 398 GetFileBaseOffset()399 static constexpr uint32_t GetFileBaseOffset() 400 { 401 return MEMBER_OFFSET(File, base_); 402 } 403 GetClassHashTable()404 Span<const panda::panda_file::EntityPairHeader> GetClassHashTable() const 405 { 406 return class_hash_table_; 407 } 408 409 static uint32_t CalcFilenameHash(const std::string &filename); 410 411 static std::unique_ptr<const File> Open(std::string_view filename, OpenMode open_mode = READ_ONLY); 412 413 static std::unique_ptr<const File> OpenFromMemory(os::mem::ConstBytePtr &&ptr); 414 415 static std::unique_ptr<const File> OpenFromMemory(os::mem::ConstBytePtr &&ptr, std::string_view filename); 416 417 static std::unique_ptr<const File> OpenUncompressedArchive(int fd, const std::string_view &filename, size_t size, 418 uint32_t offset, OpenMode open_mode = READ_ONLY); 419 SetClassHashTable(panda::Span<const panda::panda_file::EntityPairHeader> class_hash_table)420 void SetClassHashTable(panda::Span<const panda::panda_file::EntityPairHeader> class_hash_table) const 421 { 422 class_hash_table_ = class_hash_table; 423 } 424 425 static constexpr const char *INVALID_FILE_OFFSET = "Invalid file offset"; 426 427 ~File(); 428 429 NO_COPY_SEMANTIC(File); 430 NO_MOVE_SEMANTIC(File); 431 432 private: 433 File(std::string filename, os::mem::ConstBytePtr &&base); 434 435 os::mem::ConstBytePtr base_; 436 const std::string FILENAME; 437 const uint32_t FILENAME_HASH; 438 #ifdef ENABLE_FULL_FILE_FIELDS 439 const std::string FULL_FILENAME; 440 std::unique_ptr<PandaCache> panda_cache_; 441 #endif 442 const uint32_t UNIQ_ID; 443 mutable panda::Span<const panda::panda_file::EntityPairHeader> class_hash_table_; 444 }; 445 446 static_assert(File::GetFileBaseOffset() == 0); 447 448 inline bool operator==(const File::StringData &string_data1, const File::StringData &string_data2) 449 { 450 if (string_data1.utf16_length != string_data2.utf16_length) { 451 return false; 452 } 453 454 return utf::IsEqual(string_data1.data, string_data2.data); 455 } 456 457 inline bool operator!=(const File::StringData &string_data1, const File::StringData &string_data2) 458 { 459 return !(string_data1 == string_data2); 460 } 461 462 inline bool operator<(const File::StringData &string_data1, const File::StringData &string_data2) 463 { 464 if (string_data1.utf16_length == string_data2.utf16_length) { 465 return utf::CompareMUtf8ToMUtf8(string_data1.data, string_data2.data) < 0; 466 } 467 468 return string_data1.utf16_length < string_data2.utf16_length; 469 } 470 471 /* 472 * OpenPandaFileOrZip from location which specicify the name. 473 */ 474 std::unique_ptr<const File> OpenPandaFileOrZip(std::string_view location, 475 panda_file::File::OpenMode open_mode = panda_file::File::READ_ONLY); 476 477 /* 478 * OpenPandaFileFromMemory from file buffer. 479 */ 480 std::unique_ptr<const File> OpenPandaFileFromMemory(const void *buffer, size_t size); 481 482 /* 483 * OpenPandaFileFromMemory from secure buffer. 484 */ 485 std::unique_ptr<const File> OpenPandaFileFromSecureMemory(uint8_t *buffer, size_t size); 486 487 /* 488 * OpenPandaFile from location which specicify the name. 489 */ 490 std::unique_ptr<const File> OpenPandaFile(std::string_view location, std::string_view archive_filename = "", 491 panda_file::File::OpenMode open_mode = panda_file::File::READ_ONLY); 492 493 /* 494 * Check ptr point valid panda file: magic 495 */ 496 bool CheckHeader(const os::mem::ConstBytePtr &ptr, const std::string_view &filename = ""); 497 498 // NOLINTNEXTLINE(readability-identifier-naming) 499 extern const char *ARCHIVE_FILENAME; 500 } // namespace panda::panda_file 501 502 namespace std { 503 template <> 504 struct hash<panda::panda_file::File::EntityId> { 505 std::size_t operator()(panda::panda_file::File::EntityId id) const 506 { 507 return std::hash<uint32_t> {}(id.GetOffset()); 508 } 509 }; 510 511 } // namespace std 512 513 #endif // LIBPANDAFILE_FILE_H 514