1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #ifndef GOOGLE_PROTOBUF_VARINT_SHUFFLE_H__
9 #define GOOGLE_PROTOBUF_VARINT_SHUFFLE_H__
10
11 #include <cassert>
12 #include <cstddef>
13 #include <cstdint>
14 #include <type_traits>
15 #include <utility>
16
17 // Must be included last.
18 #include "google/protobuf/port_def.inc"
19
20 namespace google {
21 namespace protobuf {
22 namespace internal {
23
24 // Shifts "byte" left by n * 7 bits, filling vacated bits from `ones`.
25 template <int n>
VarintShlByte(int8_t byte,int64_t ones)26 inline PROTOBUF_ALWAYS_INLINE int64_t VarintShlByte(int8_t byte, int64_t ones) {
27 return static_cast<int64_t>((static_cast<uint64_t>(byte) << n * 7) |
28 (static_cast<uint64_t>(ones) >> (64 - n * 7)));
29 }
30
31 // Shifts "byte" left by n * 7 bits, filling vacated bits from `ones` and
32 // bitwise ANDs the resulting value into the input/output `res` parameter.
33 // Returns true if the result was not negative.
34 template <int n>
VarintShlAnd(int8_t byte,int64_t ones,int64_t & res)35 inline PROTOBUF_ALWAYS_INLINE bool VarintShlAnd(int8_t byte, int64_t ones,
36 int64_t& res) {
37 res &= VarintShlByte<n>(byte, ones);
38 return res >= 0;
39 }
40
41 // Shifts `byte` left by n * 7 bits, filling vacated bits with ones, and
42 // puts the new value in the output only parameter `res`.
43 // Returns true if the result was not negative.
44 template <int n>
VarintShl(int8_t byte,int64_t ones,int64_t & res)45 inline PROTOBUF_ALWAYS_INLINE bool VarintShl(int8_t byte, int64_t ones,
46 int64_t& res) {
47 res = VarintShlByte<n>(byte, ones);
48 return res >= 0;
49 }
50
51 template <typename VarintType, int limit = 10>
ShiftMixParseVarint(const char * p,int64_t & res1)52 inline PROTOBUF_ALWAYS_INLINE const char* ShiftMixParseVarint(const char* p,
53 int64_t& res1) {
54 using Signed = std::make_signed_t<VarintType>;
55 constexpr bool kIs64BitVarint = std::is_same<Signed, int64_t>::value;
56 constexpr bool kIs32BitVarint = std::is_same<Signed, int32_t>::value;
57 static_assert(kIs64BitVarint || kIs32BitVarint, "");
58
59 // The algorithm relies on sign extension for each byte to set all high bits
60 // when the varint continues. It also relies on asserting all of the lower
61 // bits for each successive byte read. This allows the result to be aggregated
62 // using a bitwise AND. For example:
63 //
64 // 8 1 64 57 ... 24 17 16 9 8 1
65 // ptr[0] = 1aaa aaaa ; res1 = 1111 1111 ... 1111 1111 1111 1111 1aaa aaaa
66 // ptr[1] = 1bbb bbbb ; res2 = 1111 1111 ... 1111 1111 11bb bbbb b111 1111
67 // ptr[2] = 0ccc cccc ; res3 = 0000 0000 ... 000c cccc cc11 1111 1111 1111
68 // ---------------------------------------------
69 // res1 & res2 & res3 = 0000 0000 ... 000c cccc ccbb bbbb baaa aaaa
70 //
71 // On x86-64, a shld from a single register filled with enough 1s in the high
72 // bits can accomplish all this in one instruction. It so happens that res1
73 // has 57 high bits of ones, which is enough for the largest shift done.
74 //
75 // Just as importantly, by keeping results in res1, res2, and res3, we take
76 // advantage of the superscalar abilities of the CPU.
77 const auto next = [&p] { return static_cast<const int8_t>(*p++); };
78 const auto last = [&p] { return static_cast<const int8_t>(p[-1]); };
79
80 int64_t res2, res3; // accumulated result chunks
81 res1 = next();
82 if (PROTOBUF_PREDICT_TRUE(res1 >= 0)) return p;
83 if (limit <= 1) goto limit0;
84
85 // Densify all ops with explicit FALSE predictions from here on, except that
86 // we predict length = 5 as a common length for fields like timestamp.
87 if (PROTOBUF_PREDICT_FALSE(VarintShl<1>(next(), res1, res2))) goto done1;
88 if (limit <= 2) goto limit1;
89 if (PROTOBUF_PREDICT_FALSE(VarintShl<2>(next(), res1, res3))) goto done2;
90 if (limit <= 3) goto limit2;
91 if (PROTOBUF_PREDICT_FALSE(VarintShlAnd<3>(next(), res1, res2))) goto done2;
92 if (limit <= 4) goto limit2;
93 if (PROTOBUF_PREDICT_TRUE(VarintShlAnd<4>(next(), res1, res3))) goto done2;
94 if (limit <= 5) goto limit2;
95
96 if (kIs64BitVarint) {
97 if (PROTOBUF_PREDICT_FALSE(VarintShlAnd<5>(next(), res1, res2))) goto done2;
98 if (limit <= 6) goto limit2;
99 if (PROTOBUF_PREDICT_FALSE(VarintShlAnd<6>(next(), res1, res3))) goto done2;
100 if (limit <= 7) goto limit2;
101 if (PROTOBUF_PREDICT_FALSE(VarintShlAnd<7>(next(), res1, res2))) goto done2;
102 if (limit <= 8) goto limit2;
103 if (PROTOBUF_PREDICT_FALSE(VarintShlAnd<8>(next(), res1, res3))) goto done2;
104 if (limit <= 9) goto limit2;
105 } else {
106 // An overlong int32 is expected to span the full 10 bytes
107 if (PROTOBUF_PREDICT_FALSE(!(next() & 0x80))) goto done2;
108 if (limit <= 6) goto limit2;
109 if (PROTOBUF_PREDICT_FALSE(!(next() & 0x80))) goto done2;
110 if (limit <= 7) goto limit2;
111 if (PROTOBUF_PREDICT_FALSE(!(next() & 0x80))) goto done2;
112 if (limit <= 8) goto limit2;
113 if (PROTOBUF_PREDICT_FALSE(!(next() & 0x80))) goto done2;
114 if (limit <= 9) goto limit2;
115 }
116
117 // For valid 64bit varints, the 10th byte/ptr[9] should be exactly 1. In this
118 // case, the continuation bit of ptr[8] already set the top bit of res3
119 // correctly, so all we have to do is check that the expected case is true.
120 if (PROTOBUF_PREDICT_TRUE(next() == 1)) goto done2;
121
122 if (PROTOBUF_PREDICT_FALSE(last() & 0x80)) {
123 // If the continue bit is set, it is an unterminated varint.
124 return nullptr;
125 }
126
127 // A zero value of the first bit of the 10th byte represents an
128 // over-serialized varint. This case should not happen, but if does (say, due
129 // to a nonconforming serializer), deassert the continuation bit that came
130 // from ptr[8].
131 if (kIs64BitVarint && (last() & 1) == 0) {
132 static constexpr int bits = 64 - 1;
133 #if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
134 // Use a small instruction since this is an uncommon code path.
135 asm("btc %[bits], %[res3]" : [res3] "+r"(res3) : [bits] "i"(bits));
136 #else
137 res3 ^= int64_t{1} << bits;
138 #endif
139 }
140
141 done2:
142 res2 &= res3;
143 done1:
144 res1 &= res2;
145 PROTOBUF_ASSUME(p != nullptr);
146 return p;
147 limit2:
148 res2 &= res3;
149 limit1:
150 res1 &= res2;
151 limit0:
152 PROTOBUF_ASSUME(p != nullptr);
153 PROTOBUF_ASSUME(res1 < 0);
154 return p;
155 }
156
157 } // namespace internal
158 } // namespace protobuf
159 } // namespace google
160
161 #include "google/protobuf/port_undef.inc"
162
163 #endif // GOOGLE_PROTOBUF_VARINT_SHUFFLE_H__
164