• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/parsing/duplicate-finder.h"
6 
7 #include "src/conversions.h"
8 #include "src/unicode-cache.h"
9 
10 namespace v8 {
11 namespace internal {
12 
AddOneByteSymbol(Vector<const uint8_t> key,int value)13 int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
14   return AddSymbol(key, true, value);
15 }
16 
AddTwoByteSymbol(Vector<const uint16_t> key,int value)17 int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
18   return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
19 }
20 
AddSymbol(Vector<const uint8_t> key,bool is_one_byte,int value)21 int DuplicateFinder::AddSymbol(Vector<const uint8_t> key, bool is_one_byte,
22                                int value) {
23   uint32_t hash = Hash(key, is_one_byte);
24   byte* encoding = BackupKey(key, is_one_byte);
25   base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);
26   int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
27   entry->value =
28       reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value));
29   return old_value;
30 }
31 
AddNumber(Vector<const uint8_t> key,int value)32 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
33   DCHECK(key.length() > 0);
34   // Quick check for already being in canonical form.
35   if (IsNumberCanonical(key)) {
36     return AddOneByteSymbol(key, value);
37   }
38 
39   int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
40   double double_value = StringToDouble(unicode_constants_, key, flags, 0.0);
41   int length;
42   const char* string;
43   if (!std::isfinite(double_value)) {
44     string = "Infinity";
45     length = 8;  // strlen("Infinity");
46   } else {
47     string = DoubleToCString(double_value,
48                              Vector<char>(number_buffer_, kBufferSize));
49     length = StrLength(string);
50   }
51   return AddSymbol(
52       Vector<const byte>(reinterpret_cast<const byte*>(string), length), true,
53       value);
54 }
55 
IsNumberCanonical(Vector<const uint8_t> number)56 bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
57   // Test for a safe approximation of number literals that are already
58   // in canonical form: max 15 digits, no leading zeroes, except an
59   // integer part that is a single zero, and no trailing zeros below
60   // the decimal point.
61   int pos = 0;
62   int length = number.length();
63   if (number.length() > 15) return false;
64   if (number[pos] == '0') {
65     pos++;
66   } else {
67     while (pos < length &&
68            static_cast<unsigned>(number[pos] - '0') <= ('9' - '0'))
69       pos++;
70   }
71   if (length == pos) return true;
72   if (number[pos] != '.') return false;
73   pos++;
74   bool invalid_last_digit = true;
75   while (pos < length) {
76     uint8_t digit = number[pos] - '0';
77     if (digit > '9' - '0') return false;
78     invalid_last_digit = (digit == 0);
79     pos++;
80   }
81   return !invalid_last_digit;
82 }
83 
Hash(Vector<const uint8_t> key,bool is_one_byte)84 uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
85   // Primitive hash function, almost identical to the one used
86   // for strings (except that it's seeded by the length and representation).
87   int length = key.length();
88   uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0);
89   for (int i = 0; i < length; i++) {
90     uint32_t c = key[i];
91     hash = (hash + c) * 1025;
92     hash ^= (hash >> 6);
93   }
94   return hash;
95 }
96 
Match(void * first,void * second)97 bool DuplicateFinder::Match(void* first, void* second) {
98   // Decode lengths.
99   // Length + representation is encoded as base 128, most significant heptet
100   // first, with a 8th bit being non-zero while there are more heptets.
101   // The value encodes the number of bytes following, and whether the original
102   // was Latin1.
103   byte* s1 = reinterpret_cast<byte*>(first);
104   byte* s2 = reinterpret_cast<byte*>(second);
105   uint32_t length_one_byte_field = 0;
106   byte c1;
107   do {
108     c1 = *s1;
109     if (c1 != *s2) return false;
110     length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
111     s1++;
112     s2++;
113   } while ((c1 & 0x80) != 0);
114   int length = static_cast<int>(length_one_byte_field >> 1);
115   return memcmp(s1, s2, length) == 0;
116 }
117 
BackupKey(Vector<const uint8_t> bytes,bool is_one_byte)118 byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
119                                  bool is_one_byte) {
120   uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
121   backing_store_.StartSequence();
122   // Emit one_byte_length as base-128 encoded number, with the 7th bit set
123   // on the byte of every heptet except the last, least significant, one.
124   if (one_byte_length >= (1 << 7)) {
125     if (one_byte_length >= (1 << 14)) {
126       if (one_byte_length >= (1 << 21)) {
127         if (one_byte_length >= (1 << 28)) {
128           backing_store_.Add(
129               static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
130         }
131         backing_store_.Add(
132             static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
133       }
134       backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
135     }
136     backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
137   }
138   backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
139 
140   backing_store_.AddBlock(bytes);
141   return backing_store_.EndSequence().start();
142 }
143 
144 }  // namespace internal
145 }  // namespace v8
146