1 // Copyright 2014 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_STRINGS_UNICODE_DECODER_H_ 6 #define V8_STRINGS_UNICODE_DECODER_H_ 7 8 #include "src/base/vector.h" 9 #include "src/strings/unicode.h" 10 11 namespace v8 { 12 namespace internal { 13 14 // The return value may point to the first aligned word containing the first 15 // non-one-byte character, rather than directly to the non-one-byte character. 16 // If the return value is >= the passed length, the entire string was 17 // one-byte. NonAsciiStart(const uint8_t * chars,int length)18inline int NonAsciiStart(const uint8_t* chars, int length) { 19 const uint8_t* start = chars; 20 const uint8_t* limit = chars + length; 21 22 if (static_cast<size_t>(length) >= kIntptrSize) { 23 // Check unaligned bytes. 24 while (!IsAligned(reinterpret_cast<intptr_t>(chars), kIntptrSize)) { 25 if (*chars > unibrow::Utf8::kMaxOneByteChar) { 26 return static_cast<int>(chars - start); 27 } 28 ++chars; 29 } 30 // Check aligned words. 31 DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F); 32 const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80; 33 while (chars + sizeof(uintptr_t) <= limit) { 34 if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) { 35 return static_cast<int>(chars - start); 36 } 37 chars += sizeof(uintptr_t); 38 } 39 } 40 // Check remaining unaligned bytes. 41 while (chars < limit) { 42 if (*chars > unibrow::Utf8::kMaxOneByteChar) { 43 return static_cast<int>(chars - start); 44 } 45 ++chars; 46 } 47 48 return static_cast<int>(chars - start); 49 } 50 51 class V8_EXPORT_PRIVATE Utf8Decoder final { 52 public: 53 enum class Encoding : uint8_t { kAscii, kLatin1, kUtf16 }; 54 55 explicit Utf8Decoder(const base::Vector<const uint8_t>& chars); 56 is_ascii()57 bool is_ascii() const { return encoding_ == Encoding::kAscii; } is_one_byte()58 bool is_one_byte() const { return encoding_ <= Encoding::kLatin1; } utf16_length()59 int utf16_length() const { return utf16_length_; } non_ascii_start()60 int non_ascii_start() const { return non_ascii_start_; } 61 62 template <typename Char> 63 V8_EXPORT_PRIVATE void Decode(Char* out, 64 const base::Vector<const uint8_t>& data); 65 66 private: 67 Encoding encoding_; 68 int non_ascii_start_; 69 int utf16_length_; 70 }; 71 72 } // namespace internal 73 } // namespace v8 74 75 #endif // V8_STRINGS_UNICODE_DECODER_H_ 76