• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "google/protobuf/generated_enum_util.h"
9 
10 #include <algorithm>
11 #include <array>
12 #include <cstddef>
13 #include <cstdint>
14 #include <limits>
15 #include <ostream>
16 #include <string>
17 #include <vector>
18 
19 #include <gmock/gmock.h>
20 #include <gtest/gtest.h>
21 #include "absl/container/btree_set.h"
22 #include "absl/strings/str_format.h"
23 #include "absl/types/span.h"
24 #include "google/protobuf/descriptor.h"
25 
26 
27 // Must be included last.
28 #include "google/protobuf/port_def.inc"
29 
30 using testing::_;
31 using testing::ElementsAre;
32 using testing::Gt;
33 using testing::IsEmpty;
34 using testing::SizeIs;
35 
36 namespace google {
37 namespace protobuf {
38 namespace internal {
39 namespace {
40 
TEST(GenerateEnumDataTest,DebugChecks)41 TEST(GenerateEnumDataTest, DebugChecks) {
42 #if GTEST_HAS_DEATH_TEST
43   // Not unique
44   EXPECT_DEBUG_DEATH(GenerateEnumData({1, 1}), "sorted_and_unique");
45   // Not sorted
46   EXPECT_DEBUG_DEATH(GenerateEnumData({2, 1}), "sorted_and_unique");
47 #endif
48 }
49 
Make32(uint16_t a,uint16_t b)50 uint32_t Make32(uint16_t a, uint16_t b) { return a | (b << 16); }
Unmake32(uint32_t v)51 std::array<uint16_t, 2> Unmake32(uint32_t v) {
52   return {static_cast<uint16_t>(v), static_cast<uint16_t>(v >> 16)};
53 }
54 
55 struct Header {
56   int16_t sequence_start;
57   uint16_t sequence_length;
58   uint16_t bitmap_length;
59   uint16_t ordered_length;
60 
ToStringgoogle::protobuf::internal::__anonf1fab69a0111::Header61   std::string ToString() const {
62     return absl::StrFormat("(%d,%d,%d,%d)", sequence_start, sequence_length,
63                            bitmap_length, ordered_length);
64   }
operator <<(std::ostream & os,Header header)65   friend std::ostream& operator<<(std::ostream& os, Header header) {
66     return os << header.ToString();
67   }
68 };
69 
70 MATCHER_P4(HeaderHas, sequence_start, sequence_length, bitmap_length,
71            ordered_length, "") {
72   return testing::ExplainMatchResult(sequence_start, arg.sequence_start,
73                                      result_listener) &&
74          testing::ExplainMatchResult(sequence_length, arg.sequence_length,
75                                      result_listener) &&
76          testing::ExplainMatchResult(bitmap_length, arg.bitmap_length,
77                                      result_listener) &&
78          testing::ExplainMatchResult(ordered_length, arg.ordered_length,
79                                      result_listener);
80 }
81 
ExtractHeader(absl::Span<const uint32_t> data)82 Header ExtractHeader(absl::Span<const uint32_t> data) {
83   return {
84       static_cast<int16_t>(Unmake32(data[0])[0]),
85       Unmake32(data[0])[1],
86       Unmake32(data[1])[0],
87       Unmake32(data[1])[1],
88   };
89 }
90 
TEST(GenerateEnumDataTest,BitmapSpaceOptimizationWorks)91 TEST(GenerateEnumDataTest, BitmapSpaceOptimizationWorks) {
92   std::vector<int32_t> values = {0};
93 
94   auto encoded = GenerateEnumData(values);
95   EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 0, 0));
96   EXPECT_THAT(encoded, SizeIs(2));
97 
98   // Adding one large value puts it on the fallback.
99   values.push_back(100);
100   encoded = GenerateEnumData(values);
101   EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 0, 1));
102   EXPECT_THAT(encoded, SizeIs(3));
103 
104   // Adding a second one still prefers the fallback.
105   values.push_back(101);
106   encoded = GenerateEnumData(values);
107   EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 0, 2));
108   EXPECT_THAT(encoded, SizeIs(4));
109 
110   // Adding two more now makes bitmap more efficient, so they are collapsed
111   // to it. Because we can fit the bitmap in 128 bits, which is the same as the
112   // ints.
113   values.push_back(102);
114   values.push_back(103);
115   encoded = GenerateEnumData(values);
116   EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 128, 0));
117   EXPECT_THAT(encoded, SizeIs(6));
118 
119   // Add one value that falls into the existing bitmap, nothing changes.
120   values.push_back(104);
121   encoded = GenerateEnumData(values);
122   EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 128, 0));
123   EXPECT_THAT(encoded, SizeIs(6));
124 
125   // Add one value that is in the next 32 bits. It should grow the bitmap.
126   values.push_back(130);
127   encoded = GenerateEnumData(values);
128   EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 160, 0));
129   EXPECT_THAT(encoded, SizeIs(7));
130 
131   // Add one value far away, it should go into fallback.
132   values.push_back(200);
133   encoded = GenerateEnumData(values);
134   EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 160, 1));
135   EXPECT_THAT(encoded, SizeIs(8));
136 
137   // Another in the next 32-bit block will still make them fallback.
138   values.push_back(230);
139   encoded = GenerateEnumData(values);
140   EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 160, 2));
141   EXPECT_THAT(encoded, SizeIs(9));
142 
143   // One more in that same block should collapse them to bitmap.
144   values.push_back(231);
145   encoded = GenerateEnumData(values);
146   EXPECT_THAT(ExtractHeader(encoded), HeaderHas(0, 1, 256, 0));
147   EXPECT_THAT(encoded, SizeIs(10));
148 }
149 
GatherValidValues(absl::Span<const uint32_t> data,int32_t min,int32_t max,absl::btree_set<int32_t> & out)150 void GatherValidValues(absl::Span<const uint32_t> data, int32_t min,
151                        int32_t max, absl::btree_set<int32_t>& out) {
152   if (min >= max) return;
153   for (int32_t i = min;; ++i) {
154     if (ValidateEnum(i, data.begin())) out.insert(i);
155     // We check the top limit before ++i to avoid overflows
156     if (i == max) break;
157   }
158 }
159 
GetValidValues(absl::Span<const uint32_t> data,int32_t min,int32_t max)160 std::vector<int32_t> GetValidValues(absl::Span<const uint32_t> data,
161                                     int32_t min, int32_t max) {
162   // Btree to keep them sorted. Makes testing easier.
163   absl::btree_set<int32_t> s;
164   GatherValidValues(data, min, max, s);
165   return std::vector<int32_t>(s.begin(), s.end());
166 }
167 
TEST(ValidateEnumTest,SequentialRangeTest)168 TEST(ValidateEnumTest, SequentialRangeTest) {
169   EXPECT_THAT(GetValidValues({0, 0}, -100, 100), ElementsAre());
170   EXPECT_THAT(GetValidValues(
171                   {// sequence start=3, length=3
172                    Make32(5, 3),
173                    // no bitmap, no fallback
174                    Make32(0, 0)},
175                   -100, 100),
176               ElementsAre(5, 6, 7));
177   EXPECT_THAT(GetValidValues(
178                   {// sequence start=-2, length=10
179                    Make32(-2, 10),
180                    // no bitmap, no fallback
181                    Make32(0, 0)},
182                   -100, 100),
183               ElementsAre(-2, -1, 0, 1, 2, 3, 4, 5, 6, 7));
184 }
185 
TEST(ValidateEnumTest,BitmapRangeTest)186 TEST(ValidateEnumTest, BitmapRangeTest) {
187   EXPECT_THAT(GetValidValues(
188                   {// no sequence
189                    Make32(0, 0),
190                    // bitmap of 32 bits, no fallback
191                    Make32(32, 0),
192                    // bitmap
193                    0b10011010101},
194                   -100, 100),
195               ElementsAre(0, 2, 4, 6, 7, 10));
196   EXPECT_THAT(GetValidValues(
197                   {// no sequence
198                    Make32(0, 0),
199                    // bitmap of 64 bits, no fallback
200                    Make32(64, 0),
201                    // bitmap
202                    (1 << 4) | (1 << 21), 1 << 6},
203                   -100, 100),
204               ElementsAre(4, 21, 32 + 6));
205 }
206 
TEST(ValidateEnumTest,GenerateEnumDataSequential)207 TEST(ValidateEnumTest, GenerateEnumDataSequential) {
208   EXPECT_THAT(GenerateEnumData({0, 1, 2, 3}), ElementsAre(
209                                                   // sequence start=0, length=4
210                                                   Make32(0, 4),
211                                                   // no bitmap, no fallback.
212                                                   Make32(0, 0)));
213   EXPECT_THAT(GenerateEnumData({-2, -1, 0, 1, 2, 3}),
214               ElementsAre(
215                   // sequence start=-2, length=6
216                   Make32(-2, 6),
217                   // no bitmap, no fallback.
218                   Make32(0, 0)));
219 }
220 
TestRoundTrip(absl::Span<const int32_t> values,int line)221 void TestRoundTrip(absl::Span<const int32_t> values, int line) {
222   auto encoded = GenerateEnumData(values);
223 
224   absl::btree_set<int32_t> s;
225 
226   // We test that all elements in `values` exist in the encoded data, and also
227   // test a range of other values to verify that they do not exist in the
228   // encoded data.
229 
230   // We keep track of the max seen to avoid testing the same values many times.
231   int64_t max_seen = std::numeric_limits<int64_t>::min();
232   const auto gather_valid_values_around = [&](int32_t v) {
233     int32_t min = std::max({
234         static_cast<int64_t>(v) - 100,
235         static_cast<int64_t>(std::numeric_limits<int32_t>::min()),
236         max_seen,
237     });
238     int32_t max =
239         std::min(static_cast<int64_t>(v) + 100,
240                  static_cast<int64_t>(std::numeric_limits<int32_t>::max()));
241     max_seen = std::max(max_seen, int64_t{max});
242     GatherValidValues(encoded, min, max, s);
243   };
244 
245   // We look at a few values around the expected ones.
246   // We could in theory test the whole int32_t domain, but that takes too long
247   // to run.
248   for (int32_t v : values) {
249     gather_valid_values_around(v);
250   }
251   // Also gather some around 0, just to add more coverage, specially when
252   // `values` is empty.
253   gather_valid_values_around(0);
254 
255   // Skip the checks below if we are correct because they are expensive.
256   if (std::equal(s.begin(), s.end(), values.begin(), values.end())) return;
257 
258   std::vector<int32_t> false_negatives;
259   for (int32_t v : values) {
260     if (!ValidateEnum(v, encoded.data())) false_negatives.push_back(v);
261     s.erase(v);
262   }
263   const auto& false_positives = s;
264   const auto print_data = [&] {
265     auto header = ExtractHeader(encoded);
266     return absl::StrFormat("line=%d header=%s", line, header.ToString());
267   };
268   EXPECT_THAT(false_negatives, IsEmpty())
269       << "Missing values from the input. " << print_data()
270       << "\nEncoded: " << testing::PrintToString(encoded);
271   EXPECT_THAT(false_positives, IsEmpty())
272       << "Found values not in input. " << print_data()
273       << "\nEncoded: " << testing::PrintToString(encoded);
274 }
275 
TEST(ValidateEnumTest,GenerateEnumDataSequentialWithOverflow)276 TEST(ValidateEnumTest, GenerateEnumDataSequentialWithOverflow) {
277   std::vector<int32_t> values;
278   for (int32_t i = -33000; i < 33000; ++i) {
279     values.push_back(i);
280   }
281   const auto data = GenerateEnumData(values);
282   EXPECT_THAT(ExtractHeader(data),
283               HeaderHas(
284                   // The sequence starts at the minimum possible value,
285                   std::numeric_limits<int16_t>::min(),
286                   // and it is as long as possible.
287                   0xFFFF,
288                   // we have some values in the bitmap
289                   Gt(0),
290                   // we have some in the fallback
291                   Gt(0)));
292 
293   TestRoundTrip(values, __LINE__);
294 }
295 
TEST(ValidateEnumTest,GenerateEnumDataBitmap)296 TEST(ValidateEnumTest, GenerateEnumDataBitmap) {
297   EXPECT_THAT(GenerateEnumData({0, 1, 2, 4, 8, 16, 32}),
298               ElementsAre(Make32(0, 3), Make32(32, 0),
299                           0b100000000000000010000000100010));
300   TestRoundTrip({}, __LINE__);
301   TestRoundTrip({0, 1, 2, 4, 8, 16}, __LINE__);
302   TestRoundTrip({0, 1, 2, 4, 8, 16, 32, 64, 128, 256}, __LINE__);
303   TestRoundTrip({10000, 10001, 10002, 10004, 10006, 10008, 10010}, __LINE__);
304   TestRoundTrip({std::numeric_limits<int32_t>::min(), -123123, -123, 213,
305                  213213, std::numeric_limits<int32_t>::max()},
306                 __LINE__);
307 }
308 
TEST(ValidateEnumTest,GenerateEnumDataBitmapWithOverflow)309 TEST(ValidateEnumTest, GenerateEnumDataBitmapWithOverflow) {
310   std::vector<int32_t> values;
311   // We step by 10 to guarantee each new value is more cost effective to add to
312   // the bitmap, which would cause an overflow of the 16-bit bitmap size if we
313   // didn't prevent it in the generator.
314   for (int32_t i = -33000; i < 33000; i += 10) {
315     values.push_back(i);
316   }
317   const auto data = GenerateEnumData(values);
318 
319   EXPECT_THAT(ExtractHeader(data),
320               HeaderHas(_, _,
321                         // we reached the maximum size for the bitmap.
322                         0x10000 - 32,
323                         // we have some in the fallback
324                         Gt(0)));
325 
326   TestRoundTrip(values, __LINE__);
327 }
328 
TEST(ValidateEnumTest,GenerateEnumDataWithOverflowOnBoth)329 TEST(ValidateEnumTest, GenerateEnumDataWithOverflowOnBoth) {
330   std::vector<int32_t> values;
331   for (int32_t i = -33000; i < 100000; ++i) {
332     values.push_back(i);
333   }
334   const auto data = GenerateEnumData(values);
335 
336   EXPECT_THAT(ExtractHeader(data),
337               HeaderHas(
338                   // The sequence starts at the minimum possible value,
339                   std::numeric_limits<int16_t>::min(),
340                   // and it is as long as possible.
341                   0xFFFF,
342                   // we reached the maximum size for the bitmap.
343                   0x10000 - 32,
344                   // we have some in the fallback
345                   Gt(0)));
346 
347   TestRoundTrip(values, __LINE__);
348 }
349 
350 
351 
352 }  // namespace
353 }  // namespace internal
354 }  // namespace protobuf
355 }  // namespace google
356 
357 #include "google/protobuf/port_undef.inc"
358