• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/strings/unicode-decoder.h"
6 
7 #include "src/strings/unicode-inl.h"
8 #include "src/utils/memcopy.h"
9 
10 namespace v8 {
11 namespace internal {
12 
Utf8Decoder(const base::Vector<const uint8_t> & chars)13 Utf8Decoder::Utf8Decoder(const base::Vector<const uint8_t>& chars)
14     : encoding_(Encoding::kAscii),
15       non_ascii_start_(NonAsciiStart(chars.begin(), chars.length())),
16       utf16_length_(non_ascii_start_) {
17   if (non_ascii_start_ == chars.length()) return;
18 
19   const uint8_t* cursor = chars.begin() + non_ascii_start_;
20   const uint8_t* end = chars.begin() + chars.length();
21 
22   bool is_one_byte = true;
23   uint32_t incomplete_char = 0;
24   unibrow::Utf8::State state = unibrow::Utf8::State::kAccept;
25 
26   while (cursor < end) {
27     unibrow::uchar t =
28         unibrow::Utf8::ValueOfIncremental(&cursor, &state, &incomplete_char);
29     if (t != unibrow::Utf8::kIncomplete) {
30       is_one_byte = is_one_byte && t <= unibrow::Latin1::kMaxChar;
31       utf16_length_++;
32       if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) utf16_length_++;
33     }
34   }
35 
36   unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state);
37   if (t != unibrow::Utf8::kBufferEmpty) {
38     is_one_byte = false;
39     utf16_length_++;
40   }
41 
42   encoding_ = is_one_byte ? Encoding::kLatin1 : Encoding::kUtf16;
43 }
44 
45 template <typename Char>
Decode(Char * out,const base::Vector<const uint8_t> & data)46 void Utf8Decoder::Decode(Char* out, const base::Vector<const uint8_t>& data) {
47   CopyChars(out, data.begin(), non_ascii_start_);
48 
49   out += non_ascii_start_;
50 
51   uint32_t incomplete_char = 0;
52   unibrow::Utf8::State state = unibrow::Utf8::State::kAccept;
53 
54   const uint8_t* cursor = data.begin() + non_ascii_start_;
55   const uint8_t* end = data.begin() + data.length();
56 
57   while (cursor < end) {
58     unibrow::uchar t =
59         unibrow::Utf8::ValueOfIncremental(&cursor, &state, &incomplete_char);
60     if (t != unibrow::Utf8::kIncomplete) {
61       if (sizeof(Char) == 1 || t <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
62         *(out++) = static_cast<Char>(t);
63       } else {
64         *(out++) = unibrow::Utf16::LeadSurrogate(t);
65         *(out++) = unibrow::Utf16::TrailSurrogate(t);
66       }
67     }
68   }
69 
70   unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state);
71   if (t != unibrow::Utf8::kBufferEmpty) *out = static_cast<Char>(t);
72 }
73 
74 template V8_EXPORT_PRIVATE void Utf8Decoder::Decode(
75     uint8_t* out, const base::Vector<const uint8_t>& data);
76 
77 template V8_EXPORT_PRIVATE void Utf8Decoder::Decode(
78     uint16_t* out, const base::Vector<const uint8_t>& data);
79 
80 }  // namespace internal
81 }  // namespace v8
82