• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2009-2024, Google LLC
2 // All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "upb_generator/minitable/fasttable.h"
9 
10 #include <algorithm>
11 #include <cstddef>
12 #include <cstdint>
13 #include <cstring>
14 #include <string>
15 #include <utility>
16 #include <vector>
17 
18 #include "absl/strings/substitute.h"
19 #include "upb/base/descriptor_constants.h"
20 #include "upb/mini_table/field.h"
21 #include "upb/mini_table/message.h"
22 #include "upb/reflection/def.hpp"
23 #include "upb/wire/types.h"
24 #include "upb_generator/file_layout.h"
25 
26 // Must be last.
27 #include "upb/port/def.inc"
28 
29 namespace upb {
30 namespace generator {
31 
32 namespace {
33 
34 // Returns fields in order of "hotness", eg. how frequently they appear in
35 // serialized payloads. Ideally this will use a profile. When we don't have
36 // that, we assume that fields with smaller numbers are used more frequently.
FieldHotnessOrder(upb::MessageDefPtr message)37 inline std::vector<upb::FieldDefPtr> FieldHotnessOrder(
38     upb::MessageDefPtr message) {
39   std::vector<upb::FieldDefPtr> fields;
40   size_t field_count = message.field_count();
41   fields.reserve(field_count);
42   for (size_t i = 0; i < field_count; i++) {
43     fields.push_back(message.field(i));
44   }
45   std::sort(fields.begin(), fields.end(),
46             [](upb::FieldDefPtr a, upb::FieldDefPtr b) {
47               return std::make_pair(!a.is_required(), a.number()) <
48                      std::make_pair(!b.is_required(), b.number());
49             });
50   return fields;
51 }
52 
53 typedef std::pair<std::string, uint64_t> TableEntry;
54 
GetWireTypeForField(upb::FieldDefPtr field)55 uint32_t GetWireTypeForField(upb::FieldDefPtr field) {
56   if (field.packed()) return kUpb_WireType_Delimited;
57   switch (field.type()) {
58     case kUpb_FieldType_Double:
59     case kUpb_FieldType_Fixed64:
60     case kUpb_FieldType_SFixed64:
61       return kUpb_WireType_64Bit;
62     case kUpb_FieldType_Float:
63     case kUpb_FieldType_Fixed32:
64     case kUpb_FieldType_SFixed32:
65       return kUpb_WireType_32Bit;
66     case kUpb_FieldType_Int64:
67     case kUpb_FieldType_UInt64:
68     case kUpb_FieldType_Int32:
69     case kUpb_FieldType_Bool:
70     case kUpb_FieldType_UInt32:
71     case kUpb_FieldType_Enum:
72     case kUpb_FieldType_SInt32:
73     case kUpb_FieldType_SInt64:
74       return kUpb_WireType_Varint;
75     case kUpb_FieldType_Group:
76       return kUpb_WireType_StartGroup;
77     case kUpb_FieldType_Message:
78     case kUpb_FieldType_String:
79     case kUpb_FieldType_Bytes:
80       return kUpb_WireType_Delimited;
81   }
82   UPB_UNREACHABLE();
83 }
84 
MakeTag(uint32_t field_number,uint32_t wire_type)85 uint32_t MakeTag(uint32_t field_number, uint32_t wire_type) {
86   return field_number << 3 | wire_type;
87 }
88 
WriteVarint32ToArray(uint64_t val,char * buf)89 size_t WriteVarint32ToArray(uint64_t val, char* buf) {
90   size_t i = 0;
91   do {
92     uint8_t byte = val & 0x7fU;
93     val >>= 7;
94     if (val) byte |= 0x80U;
95     buf[i++] = byte;
96   } while (val);
97   return i;
98 }
99 
GetEncodedTag(upb::FieldDefPtr field)100 uint64_t GetEncodedTag(upb::FieldDefPtr field) {
101   uint32_t wire_type = GetWireTypeForField(field);
102   uint32_t unencoded_tag = MakeTag(field.number(), wire_type);
103   char tag_bytes[10] = {0};
104   WriteVarint32ToArray(unencoded_tag, tag_bytes);
105   uint64_t encoded_tag = 0;
106   memcpy(&encoded_tag, tag_bytes, sizeof(encoded_tag));
107   // TODO: byte-swap for big endian.
108   return encoded_tag;
109 }
110 
GetTableSlot(upb::FieldDefPtr field)111 int GetTableSlot(upb::FieldDefPtr field) {
112   uint64_t tag = GetEncodedTag(field);
113   if (tag > 0x7fff) {
114     // Tag must fit within a two-byte varint.
115     return -1;
116   }
117   return (tag & 0xf8) >> 3;
118 }
119 
TryFillTableEntry(const DefPoolPair & pools,upb::FieldDefPtr field,TableEntry & ent)120 bool TryFillTableEntry(const DefPoolPair& pools, upb::FieldDefPtr field,
121                        TableEntry& ent) {
122   const upb_MiniTable* mt = pools.GetMiniTable64(field.containing_type());
123   const upb_MiniTableField* mt_f =
124       upb_MiniTable_FindFieldByNumber(mt, field.number());
125   std::string type = "";
126   std::string cardinality = "";
127   switch (upb_MiniTableField_Type(mt_f)) {
128     case kUpb_FieldType_Bool:
129       type = "b1";
130       break;
131     case kUpb_FieldType_Enum:
132       if (upb_MiniTableField_IsClosedEnum(mt_f)) {
133         // We don't have the means to test proto2 enum fields for valid values.
134         return false;
135       }
136       [[fallthrough]];
137     case kUpb_FieldType_Int32:
138     case kUpb_FieldType_UInt32:
139       type = "v4";
140       break;
141     case kUpb_FieldType_Int64:
142     case kUpb_FieldType_UInt64:
143       type = "v8";
144       break;
145     case kUpb_FieldType_Fixed32:
146     case kUpb_FieldType_SFixed32:
147     case kUpb_FieldType_Float:
148       type = "f4";
149       break;
150     case kUpb_FieldType_Fixed64:
151     case kUpb_FieldType_SFixed64:
152     case kUpb_FieldType_Double:
153       type = "f8";
154       break;
155     case kUpb_FieldType_SInt32:
156       type = "z4";
157       break;
158     case kUpb_FieldType_SInt64:
159       type = "z8";
160       break;
161     case kUpb_FieldType_String:
162       type = "s";
163       break;
164     case kUpb_FieldType_Bytes:
165       type = "b";
166       break;
167     case kUpb_FieldType_Message:
168       type = "m";
169       break;
170     default:
171       return false;  // Not supported yet.
172   }
173 
174   if (upb_MiniTableField_IsArray(mt_f)) {
175     cardinality = upb_MiniTableField_IsPacked(mt_f) ? "p" : "r";
176   } else if (upb_MiniTableField_IsScalar(mt_f)) {
177     cardinality = upb_MiniTableField_IsInOneof(mt_f) ? "o" : "s";
178   } else {
179     return false;  // Not supported yet (ever?).
180   }
181 
182   uint64_t expected_tag = GetEncodedTag(field);
183 
184   // Data is:
185   //
186   //                  48                32                16                 0
187   // |--------|--------|--------|--------|--------|--------|--------|--------|
188   // |   offset (16)   |case offset (16) |presence| submsg |  exp. tag (16)  |
189   // |--------|--------|--------|--------|--------|--------|--------|--------|
190   //
191   // - |presence| is either hasbit index or field number for oneofs.
192 
193   uint64_t data =
194       static_cast<uint64_t>(mt_f->UPB_PRIVATE(offset)) << 48 | expected_tag;
195 
196   if (field.IsSequence()) {
197     // No hasbit/oneof-related fields.
198   }
199   if (field.real_containing_oneof()) {
200     uint64_t case_offset = ~mt_f->presence;
201     if (case_offset > 0xffff || field.number() > 0xff) return false;
202     data |= field.number() << 24;
203     data |= case_offset << 32;
204   } else {
205     uint64_t hasbit_index = 63;  // No hasbit (set a high, unused bit).
206     if (mt_f->presence) {
207       hasbit_index = mt_f->presence;
208       if (hasbit_index > 31) return false;
209     }
210     data |= hasbit_index << 24;
211   }
212 
213   if (field.ctype() == kUpb_CType_Message) {
214     uint64_t idx = mt_f->UPB_PRIVATE(submsg_index);
215     if (idx > 255) return false;
216     data |= idx << 16;
217 
218     std::string size_ceil = "max";
219     size_t size = SIZE_MAX;
220     if (field.message_type().file() == field.file()) {
221       // We can only be guaranteed the size of the sub-message if it is in the
222       // same file as us.  We could relax this to increase the speed of
223       // cross-file sub-message parsing if we are comfortable requiring that
224       // users compile all messages at the same time.
225       const upb_MiniTable* sub_mt = pools.GetMiniTable64(field.message_type());
226       size = sub_mt->UPB_PRIVATE(size) + 8;
227     }
228     std::vector<size_t> breaks = {64, 128, 192, 256};
229     for (auto brk : breaks) {
230       if (size <= brk) {
231         size_ceil = std::to_string(brk);
232         break;
233       }
234     }
235     ent.first = absl::Substitute("upb_p$0$1_$2bt_max$3b", cardinality, type,
236                                  expected_tag > 0xff ? "2" : "1", size_ceil);
237 
238   } else {
239     ent.first = absl::Substitute("upb_p$0$1_$2bt", cardinality, type,
240                                  expected_tag > 0xff ? "2" : "1");
241   }
242   ent.second = data;
243   return true;
244 }
245 
246 }  // namespace
247 
FastDecodeTable(upb::MessageDefPtr message,const DefPoolPair & pools)248 std::vector<TableEntry> FastDecodeTable(upb::MessageDefPtr message,
249                                         const DefPoolPair& pools) {
250   std::vector<TableEntry> table;
251   for (const auto field : FieldHotnessOrder(message)) {
252     TableEntry ent;
253     int slot = GetTableSlot(field);
254     // std::cerr << "table slot: " << field->number() << ": " << slot << "\n";
255     if (slot < 0) {
256       // Tag can't fit in the table.
257       continue;
258     }
259     if (!TryFillTableEntry(pools, field, ent)) {
260       // Unsupported field type or offset, hasbit index, etc. doesn't fit.
261       continue;
262     }
263     while ((size_t)slot >= table.size()) {
264       size_t size = std::max(static_cast<size_t>(1), table.size() * 2);
265       table.resize(size, TableEntry{"_upb_FastDecoder_DecodeGeneric", 0});
266     }
267     if (table[slot].first != "_upb_FastDecoder_DecodeGeneric") {
268       // A hotter field already filled this slot.
269       continue;
270     }
271     table[slot] = ent;
272   }
273   return table;
274 }
275 
276 }  // namespace generator
277 }  // namespace upb
278