1 /** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef LIBPANDAFILE_FILE_H 17 #define LIBPANDAFILE_FILE_H 18 19 #include "os/mem.h" 20 #include "utils/span.h" 21 #include "utils/utf.h" 22 #include <cstdint> 23 24 #include <array> 25 #include <iomanip> 26 #include <iostream> 27 #include <memory> 28 #include <string> 29 #include <string_view> 30 31 namespace panda { 32 struct EntryFileStat; 33 } // namespace panda 34 35 namespace panda::panda_file { 36 37 class PandaCache; 38 39 /* 40 * EntityPairHeader Describes pair for hash value of class's descriptor and its entity id offset, 41 * used to quickly find class by its descriptor. 42 */ 43 struct EntityPairHeader { 44 uint32_t descriptorHash; 45 uint32_t entityIdOffset; 46 uint32_t nextPos; 47 }; 48 49 class File { 50 public: 51 using Index = uint16_t; 52 using Index32 = uint32_t; 53 54 static constexpr size_t MAGIC_SIZE = 8; 55 static constexpr size_t VERSION_SIZE = 4; 56 static const std::array<uint8_t, MAGIC_SIZE> MAGIC; 57 58 struct Header { 59 std::array<uint8_t, MAGIC_SIZE> magic; 60 uint32_t checksum; 61 std::array<uint8_t, VERSION_SIZE> version; 62 uint32_t fileSize; 63 uint32_t foreignOff; 64 uint32_t foreignSize; 65 uint32_t quickenedFlag; 66 uint32_t numClasses; 67 uint32_t classIdxOff; 68 uint32_t numLnps; 69 uint32_t lnpIdxOff; 70 uint32_t numLiteralarrays; 71 uint32_t literalarrayIdxOff; 72 uint32_t numIndexes; 73 uint32_t indexSectionOff; 74 }; 75 76 struct RegionHeader { 77 uint32_t start; 78 uint32_t end; 79 uint32_t classIdxSize; 80 uint32_t classIdxOff; 81 uint32_t methodIdxSize; 82 uint32_t methodIdxOff; 83 uint32_t fieldIdxSize; 84 uint32_t fieldIdxOff; 85 uint32_t protoIdxSize; 86 uint32_t protoIdxOff; 87 }; 88 89 struct StringData { StringDataStringData90 StringData(uint32_t len, const uint8_t *d) : utf16Length(len), isAscii(false), data(d) {} 91 StringData() = default; 92 uint32_t utf16Length; // NOLINT(misc-non-private-member-variables-in-classes) 93 bool isAscii; // NOLINT(misc-non-private-member-variables-in-classes) 94 const uint8_t *data; // NOLINT(misc-non-private-member-variables-in-classes) 95 }; 96 97 // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions, hicpp-special-member-functions) 98 class EntityId { 99 public: EntityId(uint32_t offset)100 explicit constexpr EntityId(uint32_t offset) : offset_(offset) {} 101 102 EntityId() = default; 103 104 ~EntityId() = default; 105 IsValid()106 bool IsValid() const 107 { 108 return offset_ > sizeof(Header); 109 } 110 GetOffset()111 uint32_t GetOffset() const 112 { 113 return offset_; 114 } 115 GetSize()116 static constexpr size_t GetSize() 117 { 118 return sizeof(uint32_t); 119 } 120 121 friend bool operator<(const EntityId &l, const EntityId &r) 122 { 123 return l.offset_ < r.offset_; 124 } 125 126 friend bool operator==(const EntityId &l, const EntityId &r) 127 { 128 return l.offset_ == r.offset_; 129 } 130 131 friend std::ostream &operator<<(std::ostream &stream, const EntityId &id) 132 { 133 return stream << id.offset_; 134 } 135 136 private: 137 uint32_t offset_ {0}; 138 }; 139 140 enum OpenMode { READ_ONLY, READ_WRITE, WRITE_ONLY }; 141 142 StringData GetStringData(EntityId id) const; 143 EntityId GetLiteralArraysId() const; 144 145 EntityId GetClassId(const uint8_t *mutf8Name) const; 146 147 EntityId GetClassIdFromClassHashTable(const uint8_t *mutf8Name) const; 148 GetHeader()149 const Header *GetHeader() const 150 { 151 return reinterpret_cast<const Header *>(GetBase()); 152 } 153 GetBase()154 const uint8_t *GetBase() const 155 { 156 return reinterpret_cast<const uint8_t *>(base_.Get()); 157 } 158 GetPtr()159 const os::mem::ConstBytePtr &GetPtr() const 160 { 161 return base_; 162 } 163 IsExternal(EntityId id)164 bool IsExternal(EntityId id) const 165 { 166 const Header *header = GetHeader(); 167 uint32_t foreignBegin = header->foreignOff; 168 uint32_t foreignEnd = foreignBegin + header->foreignSize; 169 return id.GetOffset() >= foreignBegin && id.GetOffset() < foreignEnd; 170 } 171 GetIdFromPointer(const uint8_t * ptr)172 EntityId GetIdFromPointer(const uint8_t *ptr) const 173 { 174 return EntityId(ptr - GetBase()); 175 } 176 GetSpanFromId(EntityId id)177 Span<const uint8_t> GetSpanFromId(EntityId id) const 178 { 179 const Header *header = GetHeader(); 180 Span file(GetBase(), header->fileSize); 181 return file.Last(file.size() - id.GetOffset()); 182 } 183 GetClasses()184 Span<const uint32_t> GetClasses() const 185 { 186 const Header *header = GetHeader(); 187 Span file(GetBase(), header->fileSize); 188 Span classIdxData = file.SubSpan(header->classIdxOff, header->numClasses * sizeof(uint32_t)); 189 return Span(reinterpret_cast<const uint32_t *>(classIdxData.data()), header->numClasses); 190 } 191 GetLiteralArrays()192 Span<const uint32_t> GetLiteralArrays() const 193 { 194 const Header *header = GetHeader(); 195 Span file(GetBase(), header->fileSize); 196 Span litarrIdxData = file.SubSpan(header->literalarrayIdxOff, header->numLiteralarrays * sizeof(uint32_t)); 197 return Span(reinterpret_cast<const uint32_t *>(litarrIdxData.data()), header->numLiteralarrays); 198 } 199 GetRegionHeaders()200 Span<const RegionHeader> GetRegionHeaders() const 201 { 202 const Header *header = GetHeader(); 203 Span file(GetBase(), header->fileSize); 204 auto sp = file.SubSpan(header->indexSectionOff, header->numIndexes * sizeof(RegionHeader)); 205 return Span(reinterpret_cast<const RegionHeader *>(sp.data()), header->numIndexes); 206 } 207 GetRegionHeader(EntityId id)208 const RegionHeader *GetRegionHeader(EntityId id) const 209 { 210 auto headers = GetRegionHeaders(); 211 auto offset = id.GetOffset(); 212 for (const auto &header : headers) { 213 if (header.start <= offset && offset < header.end) { 214 return &header; 215 } 216 } 217 return nullptr; 218 } 219 GetClassIndex(const RegionHeader * regionHeader)220 Span<const EntityId> GetClassIndex(const RegionHeader *regionHeader) const 221 { 222 auto *header = GetHeader(); 223 Span file(GetBase(), header->fileSize); 224 ASSERT(regionHeader != nullptr); 225 auto sp = file.SubSpan(regionHeader->classIdxOff, regionHeader->classIdxSize * EntityId::GetSize()); 226 return Span(reinterpret_cast<const EntityId *>(sp.data()), regionHeader->classIdxSize); 227 } 228 GetClassIndex(EntityId id)229 Span<const EntityId> GetClassIndex(EntityId id) const 230 { 231 auto *regionHeader = GetRegionHeader(id); 232 ASSERT(regionHeader != nullptr); 233 return GetClassIndex(regionHeader); 234 } 235 GetMethodIndex(const RegionHeader * regionHeader)236 Span<const EntityId> GetMethodIndex(const RegionHeader *regionHeader) const 237 { 238 auto *header = GetHeader(); 239 Span file(GetBase(), header->fileSize); 240 ASSERT(regionHeader != nullptr); 241 auto sp = file.SubSpan(regionHeader->methodIdxOff, regionHeader->methodIdxSize * EntityId::GetSize()); 242 return Span(reinterpret_cast<const EntityId *>(sp.data()), regionHeader->methodIdxSize); 243 } 244 GetMethodIndex(EntityId id)245 Span<const EntityId> GetMethodIndex(EntityId id) const 246 { 247 auto *regionHeader = GetRegionHeader(id); 248 ASSERT(regionHeader != nullptr); 249 return GetMethodIndex(regionHeader); 250 } 251 GetFieldIndex(const RegionHeader * regionHeader)252 Span<const EntityId> GetFieldIndex(const RegionHeader *regionHeader) const 253 { 254 auto *header = GetHeader(); 255 Span file(GetBase(), header->fileSize); 256 ASSERT(regionHeader != nullptr); 257 auto sp = file.SubSpan(regionHeader->fieldIdxOff, regionHeader->fieldIdxSize * EntityId::GetSize()); 258 return Span(reinterpret_cast<const EntityId *>(sp.data()), regionHeader->fieldIdxSize); 259 } 260 GetFieldIndex(EntityId id)261 Span<const EntityId> GetFieldIndex(EntityId id) const 262 { 263 auto *regionHeader = GetRegionHeader(id); 264 ASSERT(regionHeader != nullptr); 265 return GetFieldIndex(regionHeader); 266 } 267 GetProtoIndex(const RegionHeader * regionHeader)268 Span<const EntityId> GetProtoIndex(const RegionHeader *regionHeader) const 269 { 270 auto *header = GetHeader(); 271 Span file(GetBase(), header->fileSize); 272 ASSERT(regionHeader != nullptr); 273 auto sp = file.SubSpan(regionHeader->protoIdxOff, regionHeader->protoIdxSize * EntityId::GetSize()); 274 return Span(reinterpret_cast<const EntityId *>(sp.data()), regionHeader->protoIdxSize); 275 } 276 GetProtoIndex(EntityId id)277 Span<const EntityId> GetProtoIndex(EntityId id) const 278 { 279 auto *regionHeader = GetRegionHeader(id); 280 ASSERT(regionHeader != nullptr); 281 return GetProtoIndex(regionHeader); 282 } 283 GetLineNumberProgramIndex()284 Span<const EntityId> GetLineNumberProgramIndex() const 285 { 286 const Header *header = GetHeader(); 287 Span file(GetBase(), header->fileSize); 288 Span lnpIdxData = file.SubSpan(header->lnpIdxOff, header->numLnps * EntityId::GetSize()); 289 return Span(reinterpret_cast<const EntityId *>(lnpIdxData.data()), header->numLnps); 290 } 291 ResolveClassIndex(EntityId id,Index idx)292 EntityId ResolveClassIndex(EntityId id, Index idx) const 293 { 294 auto index = GetClassIndex(id); 295 return index[idx]; 296 } 297 ResolveMethodIndex(EntityId id,Index idx)298 EntityId ResolveMethodIndex(EntityId id, Index idx) const 299 { 300 auto index = GetMethodIndex(id); 301 return index[idx]; 302 } 303 ResolveFieldIndex(EntityId id,Index idx)304 EntityId ResolveFieldIndex(EntityId id, Index idx) const 305 { 306 auto index = GetFieldIndex(id); 307 return index[idx]; 308 } 309 ResolveProtoIndex(EntityId id,Index idx)310 EntityId ResolveProtoIndex(EntityId id, Index idx) const 311 { 312 auto index = GetProtoIndex(id); 313 return index[idx]; 314 } 315 ResolveLineNumberProgramIndex(Index32 idx)316 EntityId ResolveLineNumberProgramIndex(Index32 idx) const 317 { 318 auto index = GetLineNumberProgramIndex(); 319 return index[idx]; 320 } 321 GetFilename()322 const std::string &GetFilename() const 323 { 324 return filename_; 325 } 326 GetPandaCache()327 PandaCache *GetPandaCache() const 328 { 329 return pandaCache_.get(); 330 } 331 GetFilenameHash()332 uint32_t GetFilenameHash() const 333 { 334 return filenameHash_; 335 } 336 337 // note: intentionally returns uint64_t instead of the field type due to usage GetUniqId()338 uint64_t GetUniqId() const 339 { 340 return uniqId_; 341 } 342 GetFullFileName()343 const std::string &GetFullFileName() const 344 { 345 return fullFilename_; 346 } 347 GetFileBaseOffset()348 static constexpr uint32_t GetFileBaseOffset() 349 { 350 return MEMBER_OFFSET(File, base_); 351 } 352 GetClassHashTable()353 Span<const panda::panda_file::EntityPairHeader> GetClassHashTable() const 354 { 355 return classHashTable_; 356 } 357 GetPaddedChecksum()358 std::string GetPaddedChecksum() const 359 { 360 std::stringstream paddedChecksum; 361 // Length of hexed maximum unit32_t value of checksum (0xFFFFFFFF) is equal to 8 362 constexpr size_t CHECKSUM_LENGTH = 8; 363 paddedChecksum << std::setfill('0') << std::setw(CHECKSUM_LENGTH) << std::hex << GetHeader()->checksum; 364 return paddedChecksum.str(); 365 } 366 367 static uint32_t CalcFilenameHash(const std::string &filename); 368 369 static std::unique_ptr<const File> Open(std::string_view filename, OpenMode openMode = READ_ONLY); 370 371 static std::unique_ptr<const File> OpenFromMemory(os::mem::ConstBytePtr &&ptr); 372 373 static std::unique_ptr<const File> OpenFromMemory(os::mem::ConstBytePtr &&ptr, std::string_view filename); 374 375 static std::unique_ptr<const File> OpenUncompressedArchive(int fd, const std::string_view &filename, size_t size, 376 uint32_t offset, OpenMode openMode = READ_ONLY); 377 SetClassHashTable(panda::Span<const panda::panda_file::EntityPairHeader> classHashTable)378 void SetClassHashTable(panda::Span<const panda::panda_file::EntityPairHeader> classHashTable) const 379 { 380 classHashTable_ = classHashTable; 381 } 382 383 ~File(); 384 385 NO_COPY_SEMANTIC(File); 386 NO_MOVE_SEMANTIC(File); 387 388 private: 389 File(std::string filename, os::mem::ConstBytePtr &&base); 390 391 os::mem::ConstBytePtr base_; 392 const std::string filename_; 393 const uint32_t filenameHash_; 394 const std::string fullFilename_; 395 std::unique_ptr<PandaCache> pandaCache_; 396 const uint32_t uniqId_; 397 mutable panda::Span<const panda::panda_file::EntityPairHeader> classHashTable_; 398 }; 399 400 static_assert(File::GetFileBaseOffset() == 0); 401 402 inline bool operator==(const File::StringData &stringData1, const File::StringData &stringData2) 403 { 404 if (stringData1.utf16Length != stringData2.utf16Length) { 405 return false; 406 } 407 408 return utf::IsEqual(stringData1.data, stringData2.data); 409 } 410 411 inline bool operator!=(const File::StringData &stringData1, const File::StringData &stringData2) 412 { 413 return !(stringData1 == stringData2); 414 } 415 416 inline bool operator<(const File::StringData &stringData1, const File::StringData &stringData2) 417 { 418 if (stringData1.utf16Length == stringData2.utf16Length) { 419 return utf::CompareMUtf8ToMUtf8(stringData1.data, stringData2.data) < 0; 420 } 421 422 return stringData1.utf16Length < stringData2.utf16Length; 423 } 424 425 /* 426 * OpenPandaFileOrZip from location which specicify the name. 427 */ 428 std::unique_ptr<const File> OpenPandaFileOrZip(std::string_view location, 429 panda_file::File::OpenMode openMode = panda_file::File::READ_ONLY); 430 431 /* 432 * OpenPandaFileFromMemory from file buffer. 433 */ 434 std::unique_ptr<const File> OpenPandaFileFromMemory(const void *buffer, size_t size); 435 436 /* 437 * OpenPandaFile from location which specicify the name. 438 */ 439 std::unique_ptr<const File> OpenPandaFile(std::string_view location, std::string_view archiveFilename = "", 440 panda_file::File::OpenMode openMode = panda_file::File::READ_ONLY); 441 442 /* 443 * Check ptr point valid panda file: magic 444 */ 445 bool CheckHeader(const os::mem::ConstBytePtr &ptr, const std::string_view &filename = ""); 446 447 // NOLINTNEXTLINE(readability-identifier-naming) 448 extern const char *g_archiveFilenames; 449 } // namespace panda::panda_file 450 451 namespace std { 452 template <> 453 struct hash<panda::panda_file::File::EntityId> { 454 std::size_t operator()(panda::panda_file::File::EntityId id) const 455 { 456 return std::hash<uint32_t> {}(id.GetOffset()); 457 } 458 }; 459 } // namespace std 460 461 #endif 462