1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "google/protobuf/generated_enum_util.h"
9
10 #include <algorithm>
11 #include <array>
12 #include <cstddef>
13 #include <cstdint>
14 #include <limits>
15 #include <ostream>
16 #include <string>
17 #include <vector>
18
19 #include <gmock/gmock.h>
20 #include <gtest/gtest.h>
21 #include "absl/container/btree_set.h"
22 #include "absl/strings/str_format.h"
23 #include "absl/types/span.h"
24 #include "google/protobuf/descriptor.h"
25
26
27 // Must be included last.
28 #include "google/protobuf/port_def.inc"
29
30 using testing::_;
31 using testing::ElementsAre;
32 using testing::Gt;
33 using testing::IsEmpty;
34 using testing::SizeIs;
35
36 namespace google {
37 namespace protobuf {
38 namespace internal {
39 namespace {
40
TEST(GenerateEnumDataTest,DebugChecks)41 TEST(GenerateEnumDataTest, DebugChecks) {
42 #if GTEST_HAS_DEATH_TEST
43 // Not unique
44 EXPECT_DEBUG_DEATH(GenerateEnumData({1, 1}), "sorted_and_unique");
45 // Not sorted
46 EXPECT_DEBUG_DEATH(GenerateEnumData({2, 1}), "sorted_and_unique");
47 #endif
48 }
49
Make32(uint16_t a,uint16_t b)50 uint32_t Make32(uint16_t a, uint16_t b) { return a | (b << 16); }
Unmake32(uint32_t v)51 std::array<uint16_t, 2> Unmake32(uint32_t v) {
52 return {static_cast<uint16_t>(v), static_cast<uint16_t>(v >> 16)};
53 }
54
55 struct Header {
56 int16_t sequence_start;
57 uint16_t sequence_length;
58 uint16_t bitmap_length;
59 uint16_t ordered_length;
60
ToStringgoogle::protobuf::internal::__anonf1fab69a0111::Header61 std::string ToString() const {
62 return absl::StrFormat("(%d,%d,%d,%d)", sequence_start, sequence_length,
63 bitmap_length, ordered_length);
64 }
operator <<(std::ostream & os,Header header)65 friend std::ostream& operator<<(std::ostream& os, Header header) {
66 return os << header.ToString();
67 }
68 };
69
70 MATCHER_P4(HeaderHas, sequence_start, sequence_length, bitmap_length,
71 ordered_length, "") {
72 return testing::ExplainMatchResult(sequence_start, arg.sequence_start,
73 result_listener) &&
74 testing::ExplainMatchResult(sequence_length, arg.sequence_length,
75 result_listener) &&
76 testing::ExplainMatchResult(bitmap_length, arg.bitmap_length,
77 result_listener) &&
78 testing::ExplainMatchResult(ordered_length, arg.ordered_length,
79 result_listener);
80 }
81
ExtractHeader(absl::Span<const uint32_t> data)82 Header ExtractHeader(absl::Span<const uint32_t> data) {
83 return {
84 static_cast<int16_t>(Unmake32(data[0])[0]),
85 Unmake32(data[0])[1],
86 Unmake32(data[1])[0],
87 Unmake32(data[1])[1],
88 };
89 }
90
TEST(GenerateEnumDataTest,BitmapSpaceOptimizationWorks)91 TEST(GenerateEnumDataTest, BitmapSpaceOptimizationWorks) {
92 std::vector<int32_t> values = {0};
93
94 auto encoded = GenerateEnumData(values);
95 EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 0, 0));
96 EXPECT_THAT(encoded, SizeIs(2));
97
98 // Adding one large value puts it on the fallback.
99 values.push_back(100);
100 encoded = GenerateEnumData(values);
101 EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 0, 1));
102 EXPECT_THAT(encoded, SizeIs(3));
103
104 // Adding a second one still prefers the fallback.
105 values.push_back(101);
106 encoded = GenerateEnumData(values);
107 EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 0, 2));
108 EXPECT_THAT(encoded, SizeIs(4));
109
110 // Adding two more now makes bitmap more efficient, so they are collapsed
111 // to it. Because we can fit the bitmap in 128 bits, which is the same as the
112 // ints.
113 values.push_back(102);
114 values.push_back(103);
115 encoded = GenerateEnumData(values);
116 EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 128, 0));
117 EXPECT_THAT(encoded, SizeIs(6));
118
119 // Add one value that falls into the existing bitmap, nothing changes.
120 values.push_back(104);
121 encoded = GenerateEnumData(values);
122 EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 128, 0));
123 EXPECT_THAT(encoded, SizeIs(6));
124
125 // Add one value that is in the next 32 bits. It should grow the bitmap.
126 values.push_back(130);
127 encoded = GenerateEnumData(values);
128 EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 160, 0));
129 EXPECT_THAT(encoded, SizeIs(7));
130
131 // Add one value far away, it should go into fallback.
132 values.push_back(200);
133 encoded = GenerateEnumData(values);
134 EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 160, 1));
135 EXPECT_THAT(encoded, SizeIs(8));
136
137 // Another in the next 32-bit block will still make them fallback.
138 values.push_back(230);
139 encoded = GenerateEnumData(values);
140 EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 160, 2));
141 EXPECT_THAT(encoded, SizeIs(9));
142
143 // One more in that same block should collapse them to bitmap.
144 values.push_back(231);
145 encoded = GenerateEnumData(values);
146 EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 256, 0));
147 EXPECT_THAT(encoded, SizeIs(10));
148 }
149
GatherValidValues(absl::Span<const uint32_t> data,int32_t min,int32_t max,absl::btree_set<int32_t> & out)150 void GatherValidValues(absl::Span<const uint32_t> data, int32_t min,
151 int32_t max, absl::btree_set<int32_t>& out) {
152 if (min >= max) return;
153 for (int32_t i = min;; ++i) {
154 if (ValidateEnum(i, data.begin())) out.insert(i);
155 // We check the top limit before ++i to avoid overflows
156 if (i == max) break;
157 }
158 }
159
GetValidValues(absl::Span<const uint32_t> data,int32_t min,int32_t max)160 std::vector<int32_t> GetValidValues(absl::Span<const uint32_t> data,
161 int32_t min, int32_t max) {
162 // Btree to keep them sorted. Makes testing easier.
163 absl::btree_set<int32_t> s;
164 GatherValidValues(data, min, max, s);
165 return std::vector<int32_t>(s.begin(), s.end());
166 }
167
TEST(ValidateEnumTest,SequentialRangeTest)168 TEST(ValidateEnumTest, SequentialRangeTest) {
169 EXPECT_THAT(GetValidValues({0, 0}, -100, 100), ElementsAre());
170 EXPECT_THAT(GetValidValues(
171 {// sequence start=3, length=3
172 Make32(5, 3),
173 // no bitmap, no fallback
174 Make32(0, 0)},
175 -100, 100),
176 ElementsAre(5, 6, 7));
177 EXPECT_THAT(GetValidValues(
178 {// sequence start=-2, length=10
179 Make32(-2, 10),
180 // no bitmap, no fallback
181 Make32(0, 0)},
182 -100, 100),
183 ElementsAre(-2, -1, 0, 1, 2, 3, 4, 5, 6, 7));
184 }
185
TEST(ValidateEnumTest,BitmapRangeTest)186 TEST(ValidateEnumTest, BitmapRangeTest) {
187 EXPECT_THAT(GetValidValues(
188 {// no sequence
189 Make32(0, 0),
190 // bitmap of 32 bits, no fallback
191 Make32(32, 0),
192 // bitmap
193 0b10011010101},
194 -100, 100),
195 ElementsAre(0, 2, 4, 6, 7, 10));
196 EXPECT_THAT(GetValidValues(
197 {// no sequence
198 Make32(0, 0),
199 // bitmap of 64 bits, no fallback
200 Make32(64, 0),
201 // bitmap
202 (1 << 4) | (1 << 21), 1 << 6},
203 -100, 100),
204 ElementsAre(4, 21, 32 + 6));
205 }
206
TEST(ValidateEnumTest,GenerateEnumDataSequential)207 TEST(ValidateEnumTest, GenerateEnumDataSequential) {
208 EXPECT_THAT(GenerateEnumData({0, 1, 2, 3}), ElementsAre(
209 // sequence start=0, length=4
210 Make32(0, 4),
211 // no bitmap, no fallback.
212 Make32(0, 0)));
213 EXPECT_THAT(GenerateEnumData({-2, -1, 0, 1, 2, 3}),
214 ElementsAre(
215 // sequence start=-2, length=6
216 Make32(-2, 6),
217 // no bitmap, no fallback.
218 Make32(0, 0)));
219 }
220
TestRoundTrip(absl::Span<const int32_t> values,int line)221 void TestRoundTrip(absl::Span<const int32_t> values, int line) {
222 auto encoded = GenerateEnumData(values);
223
224 absl::btree_set<int32_t> s;
225
226 // We test that all elements in `values` exist in the encoded data, and also
227 // test a range of other values to verify that they do not exist in the
228 // encoded data.
229
230 // We keep track of the max seen to avoid testing the same values many times.
231 int64_t max_seen = std::numeric_limits<int64_t>::min();
232 const auto gather_valid_values_around = [&](int32_t v) {
233 int32_t min = std::max({
234 static_cast<int64_t>(v) - 100,
235 static_cast<int64_t>(std::numeric_limits<int32_t>::min()),
236 max_seen,
237 });
238 int32_t max =
239 std::min(static_cast<int64_t>(v) + 100,
240 static_cast<int64_t>(std::numeric_limits<int32_t>::max()));
241 max_seen = std::max(max_seen, int64_t{max});
242 GatherValidValues(encoded, min, max, s);
243 };
244
245 // We look at a few values around the expected ones.
246 // We could in theory test the whole int32_t domain, but that takes too long
247 // to run.
248 for (int32_t v : values) {
249 gather_valid_values_around(v);
250 }
251 // Also gather some around 0, just to add more coverage, specially when
252 // `values` is empty.
253 gather_valid_values_around(0);
254
255 // Skip the checks below if we are correct because they are expensive.
256 if (std::equal(s.begin(), s.end(), values.begin(), values.end())) return;
257
258 std::vector<int32_t> false_negatives;
259 for (int32_t v : values) {
260 if (!ValidateEnum(v, encoded.data())) false_negatives.push_back(v);
261 s.erase(v);
262 }
263 const auto& false_positives = s;
264 const auto print_data = [&] {
265 auto header = ExtractHeader(encoded);
266 return absl::StrFormat("line=%d header=%s", line, header.ToString());
267 };
268 EXPECT_THAT(false_negatives, IsEmpty())
269 << "Missing values from the input. " << print_data()
270 << "\nEncoded: " << testing::PrintToString(encoded);
271 EXPECT_THAT(false_positives, IsEmpty())
272 << "Found values not in input. " << print_data()
273 << "\nEncoded: " << testing::PrintToString(encoded);
274 }
275
TEST(ValidateEnumTest,GenerateEnumDataSequentialWithOverflow)276 TEST(ValidateEnumTest, GenerateEnumDataSequentialWithOverflow) {
277 std::vector<int32_t> values;
278 for (int32_t i = -33000; i < 33000; ++i) {
279 values.push_back(i);
280 }
281 const auto data = GenerateEnumData(values);
282 EXPECT_THAT(ExtractHeader(data),
283 HeaderHas(
284 // The sequence starts at the minimum possible value,
285 std::numeric_limits<int16_t>::min(),
286 // and it is as long as possible.
287 0xFFFF,
288 // we have some values in the bitmap
289 Gt(0),
290 // we have some in the fallback
291 Gt(0)));
292
293 TestRoundTrip(values, __LINE__);
294 }
295
TEST(ValidateEnumTest,GenerateEnumDataBitmap)296 TEST(ValidateEnumTest, GenerateEnumDataBitmap) {
297 EXPECT_THAT(GenerateEnumData({0, 1, 2, 4, 8, 16, 32}),
298 ElementsAre(Make32(0, 3), Make32(32, 0),
299 0b100000000000000010000000100010));
300 TestRoundTrip({}, __LINE__);
301 TestRoundTrip({0, 1, 2, 4, 8, 16}, __LINE__);
302 TestRoundTrip({0, 1, 2, 4, 8, 16, 32, 64, 128, 256}, __LINE__);
303 TestRoundTrip({10000, 10001, 10002, 10004, 10006, 10008, 10010}, __LINE__);
304 TestRoundTrip({std::numeric_limits<int32_t>::min(), -123123, -123, 213,
305 213213, std::numeric_limits<int32_t>::max()},
306 __LINE__);
307 }
308
TEST(ValidateEnumTest,GenerateEnumDataBitmapWithOverflow)309 TEST(ValidateEnumTest, GenerateEnumDataBitmapWithOverflow) {
310 std::vector<int32_t> values;
311 // We step by 10 to guarantee each new value is more cost effective to add to
312 // the bitmap, which would cause an overflow of the 16-bit bitmap size if we
313 // didn't prevent it in the generator.
314 for (int32_t i = -33000; i < 33000; i += 10) {
315 values.push_back(i);
316 }
317 const auto data = GenerateEnumData(values);
318
319 EXPECT_THAT(ExtractHeader(data),
320 HeaderHas(_, _,
321 // we reached the maximum size for the bitmap.
322 0x10000 - 32,
323 // we have some in the fallback
324 Gt(0)));
325
326 TestRoundTrip(values, __LINE__);
327 }
328
TEST(ValidateEnumTest,GenerateEnumDataWithOverflowOnBoth)329 TEST(ValidateEnumTest, GenerateEnumDataWithOverflowOnBoth) {
330 std::vector<int32_t> values;
331 for (int32_t i = -33000; i < 100000; ++i) {
332 values.push_back(i);
333 }
334 const auto data = GenerateEnumData(values);
335
336 EXPECT_THAT(ExtractHeader(data),
337 HeaderHas(
338 // The sequence starts at the minimum possible value,
339 std::numeric_limits<int16_t>::min(),
340 // and it is as long as possible.
341 0xFFFF,
342 // we reached the maximum size for the bitmap.
343 0x10000 - 32,
344 // we have some in the fallback
345 Gt(0)));
346
347 TestRoundTrip(values, __LINE__);
348 }
349
350
351
352 } // namespace
353 } // namespace internal
354 } // namespace protobuf
355 } // namespace google
356
357 #include "google/protobuf/port_undef.inc"
358