• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
18 #define INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
19 
20 #include <stddef.h>
21 
22 #include <cinttypes>
23 #include <type_traits>
24 
25 #include "perfetto/base/logging.h"
26 #include "perfetto/public/pb_utils.h"
27 
28 // Helper macro for the constexpr functions containing
29 // the switch statement: if C++14 is supported, this macro
30 // resolves to `constexpr` and just `inline` otherwise.
31 #if __cpp_constexpr >= 201304
32 #define PERFETTO_PROTOZERO_CONSTEXPR14_OR_INLINE constexpr
33 #else
34 #define PERFETTO_PROTOZERO_CONSTEXPR14_OR_INLINE inline
35 #endif
36 
37 namespace protozero {
38 namespace proto_utils {
39 
40 // See https://developers.google.com/protocol-buffers/docs/encoding wire types.
41 // This is a type encoded into the proto that provides just enough info to
42 // find the length of the following value.
43 enum class ProtoWireType : uint32_t {
44   kVarInt = 0,
45   kFixed64 = 1,
46   kLengthDelimited = 2,
47   kFixed32 = 5,
48 };
49 
50 // This is the type defined in the proto for each field. This information
51 // is used to decide the translation strategy when writing the trace.
52 enum class ProtoSchemaType {
53   kUnknown = 0,
54   kDouble,
55   kFloat,
56   kInt64,
57   kUint64,
58   kInt32,
59   kFixed64,
60   kFixed32,
61   kBool,
62   kString,
63   kGroup,  // Deprecated (proto2 only)
64   kMessage,
65   kBytes,
66   kUint32,
67   kEnum,
68   kSfixed32,
69   kSfixed64,
70   kSint32,
71   kSint64,
72 };
73 
ProtoSchemaToString(ProtoSchemaType v)74 inline const char* ProtoSchemaToString(ProtoSchemaType v) {
75   switch (v) {
76     case ProtoSchemaType::kUnknown:
77       return "unknown";
78     case ProtoSchemaType::kDouble:
79       return "double";
80     case ProtoSchemaType::kFloat:
81       return "float";
82     case ProtoSchemaType::kInt64:
83       return "int64";
84     case ProtoSchemaType::kUint64:
85       return "uint64";
86     case ProtoSchemaType::kInt32:
87       return "int32";
88     case ProtoSchemaType::kFixed64:
89       return "fixed64";
90     case ProtoSchemaType::kFixed32:
91       return "fixed32";
92     case ProtoSchemaType::kBool:
93       return "bool";
94     case ProtoSchemaType::kString:
95       return "string";
96     case ProtoSchemaType::kGroup:
97       return "group";
98     case ProtoSchemaType::kMessage:
99       return "message";
100     case ProtoSchemaType::kBytes:
101       return "bytes";
102     case ProtoSchemaType::kUint32:
103       return "uint32";
104     case ProtoSchemaType::kEnum:
105       return "enum";
106     case ProtoSchemaType::kSfixed32:
107       return "sfixed32";
108     case ProtoSchemaType::kSfixed64:
109       return "sfixed64";
110     case ProtoSchemaType::kSint32:
111       return "sint32";
112     case ProtoSchemaType::kSint64:
113       return "sint64";
114   }
115   // For gcc:
116   PERFETTO_DCHECK(false);
117   return "";
118 }
119 
120 // Maximum message size supported: 256 MiB (4 x 7-bit due to varint encoding).
121 constexpr size_t kMessageLengthFieldSize = 4;
122 constexpr size_t kMaxMessageLength = (1u << (kMessageLengthFieldSize * 7)) - 1;
123 constexpr size_t kMaxOneByteMessageLength = (1 << 7) - 1;
124 
125 // Field tag is encoded as 32-bit varint (5 bytes at most).
126 // Largest value of simple (not length-delimited) field is 64-bit varint
127 // (10 bytes at most). 15 bytes buffer is enough to store a simple field.
128 constexpr size_t kMaxTagEncodedSize = 5;
129 constexpr size_t kMaxSimpleFieldEncodedSize = kMaxTagEncodedSize + 10;
130 
131 // Proto types: (int|uint|sint)(32|64), bool, enum.
MakeTagVarInt(uint32_t field_id)132 constexpr uint32_t MakeTagVarInt(uint32_t field_id) {
133   return (field_id << 3) | static_cast<uint32_t>(ProtoWireType::kVarInt);
134 }
135 
136 // Proto types: fixed64, sfixed64, fixed32, sfixed32, double, float.
137 template <typename T>
MakeTagFixed(uint32_t field_id)138 constexpr uint32_t MakeTagFixed(uint32_t field_id) {
139   static_assert(sizeof(T) == 8 || sizeof(T) == 4, "Value must be 4 or 8 bytes");
140   return (field_id << 3) |
141          static_cast<uint32_t>((sizeof(T) == 8 ? ProtoWireType::kFixed64
142                                                : ProtoWireType::kFixed32));
143 }
144 
145 // Proto types: string, bytes, embedded messages.
MakeTagLengthDelimited(uint32_t field_id)146 constexpr uint32_t MakeTagLengthDelimited(uint32_t field_id) {
147   return (field_id << 3) |
148          static_cast<uint32_t>(ProtoWireType::kLengthDelimited);
149 }
150 
151 // Proto types: sint64, sint32.
152 template <typename T>
ZigZagEncode(T value)153 inline typename std::make_unsigned<T>::type ZigZagEncode(T value) {
154   using UnsignedType = typename std::make_unsigned<T>::type;
155 
156   // Right-shift of negative values is implementation specific.
157   // Assert the implementation does what we expect, which is that shifting any
158   // positive value by sizeof(T) * 8 - 1 gives an all 0 bitmap, and a negative
159   // value gives an all 1 bitmap.
160   constexpr uint64_t kUnsignedZero = 0u;
161   constexpr int64_t kNegativeOne = -1;
162   constexpr int64_t kPositiveOne = 1;
163   static_assert(static_cast<uint64_t>(kNegativeOne >> 63) == ~kUnsignedZero,
164                 "implementation does not support assumed rightshift");
165   static_assert(static_cast<uint64_t>(kPositiveOne >> 63) == kUnsignedZero,
166                 "implementation does not support assumed rightshift");
167 
168   return (static_cast<UnsignedType>(value) << 1) ^
169          static_cast<UnsignedType>(value >> (sizeof(T) * 8 - 1));
170 }
171 
172 // Proto types: sint64, sint32.
173 template <typename T>
ZigZagDecode(T value)174 inline typename std::make_signed<T>::type ZigZagDecode(T value) {
175   using UnsignedType = typename std::make_unsigned<T>::type;
176   using SignedType = typename std::make_signed<T>::type;
177   auto u_value = static_cast<UnsignedType>(value);
178   auto mask = static_cast<UnsignedType>(-static_cast<SignedType>(u_value & 1));
179   return static_cast<SignedType>((u_value >> 1) ^ mask);
180 }
181 
182 template <typename T>
183 auto ExtendValueForVarIntSerialization(T value) -> typename std::make_unsigned<
184     typename std::conditional<std::is_unsigned<T>::value, T, int64_t>::type>::
185     type {
186   // If value is <= 0 we must first sign extend to int64_t (see [1]).
187   // Finally we always cast to an unsigned value to to avoid arithmetic
188   // (sign expanding) shifts in the while loop.
189   // [1]: "If you use int32 or int64 as the type for a negative number, the
190   // resulting varint is always ten bytes long".
191   // - developers.google.com/protocol-buffers/docs/encoding
192   // So for each input type we do the following casts:
193   // uintX_t -> uintX_t -> uintX_t
194   // int8_t  -> int64_t -> uint64_t
195   // int16_t -> int64_t -> uint64_t
196   // int32_t -> int64_t -> uint64_t
197   // int64_t -> int64_t -> uint64_t
198   using MaybeExtendedType =
199       typename std::conditional<std::is_unsigned<T>::value, T, int64_t>::type;
200   using UnsignedType = typename std::make_unsigned<MaybeExtendedType>::type;
201 
202   MaybeExtendedType extended_value = static_cast<MaybeExtendedType>(value);
203   UnsignedType unsigned_value = static_cast<UnsignedType>(extended_value);
204 
205   return unsigned_value;
206 }
207 
208 template <typename T>
WriteVarInt(T value,uint8_t * target)209 inline uint8_t* WriteVarInt(T value, uint8_t* target) {
210   auto unsigned_value = ExtendValueForVarIntSerialization(value);
211 
212   while (unsigned_value >= 0x80) {
213     *target++ = static_cast<uint8_t>(unsigned_value) | 0x80;
214     unsigned_value >>= 7;
215   }
216   *target = static_cast<uint8_t>(unsigned_value);
217   return target + 1;
218 }
219 
220 // Writes a fixed-size redundant encoding of the given |value|. This is
221 // used to backfill fixed-size reservations for the length field using a
222 // non-canonical varint encoding (e.g. \x81\x80\x80\x00 instead of \x01).
223 // See https://github.com/google/protobuf/issues/1530.
224 // This is used mainly in two cases:
225 // 1) At trace writing time, when starting a nested messages. The size of a
226 //    nested message is not known until all its field have been written.
227 //    |kMessageLengthFieldSize| bytes are reserved to encode the size field and
228 //    backfilled at the end.
229 // 2) When rewriting a message at trace filtering time, in protozero/filtering.
230 //    At that point we know only the upper bound of the length (a filtered
231 //    message is <= the original one) and we backfill after the message has been
232 //    filtered.
233 inline void WriteRedundantVarInt(uint32_t value,
234                                  uint8_t* buf,
235                                  size_t size = kMessageLengthFieldSize) {
236   for (size_t i = 0; i < size; ++i) {
237     const uint8_t msb = (i < size - 1) ? 0x80 : 0;
238     buf[i] = static_cast<uint8_t>(value) | msb;
239     value >>= 7;
240   }
241 }
242 
243 template <uint32_t field_id>
StaticAssertSingleBytePreamble()244 void StaticAssertSingleBytePreamble() {
245   static_assert(field_id < 16,
246                 "Proto field id too big to fit in a single byte preamble");
247 }
248 
249 // Parses a VarInt from the encoded buffer [start, end). |end| is STL-style and
250 // points one byte past the end of buffer.
251 // The parsed int value is stored in the output arg |value|. Returns a pointer
252 // to the next unconsumed byte (so start < retval <= end) or |start| if the
253 // VarInt could not be fully parsed because there was not enough space in the
254 // buffer.
ParseVarInt(const uint8_t * start,const uint8_t * end,uint64_t * out_value)255 inline const uint8_t* ParseVarInt(const uint8_t* start,
256                                   const uint8_t* end,
257                                   uint64_t* out_value) {
258   return PerfettoPbParseVarInt(start, end, out_value);
259 }
260 
261 enum class RepetitionType {
262   kNotRepeated,
263   kRepeatedPacked,
264   kRepeatedNotPacked,
265 };
266 
267 // Provide a common base struct for all templated FieldMetadata types to allow
268 // simple checks if a given type is a FieldMetadata or not.
269 struct FieldMetadataBase {
270   constexpr FieldMetadataBase() = default;
271 };
272 
273 template <uint32_t field_id,
274           RepetitionType repetition_type,
275           ProtoSchemaType proto_schema_type,
276           typename CppFieldType,
277           typename MessageType>
278 struct FieldMetadata : public FieldMetadataBase {
279   constexpr FieldMetadata() = default;
280 
281   static constexpr int kFieldId = field_id;
282   // Whether this field is repeated, packed (repeated [packed-true]) or not
283   // (optional).
284   static constexpr RepetitionType kRepetitionType = repetition_type;
285   // Proto type of this field (e.g. int64, fixed32 or nested message).
286   static constexpr ProtoSchemaType kProtoFieldType = proto_schema_type;
287   // C++ type of this field (for nested messages - C++ protozero class).
288   using cpp_field_type = CppFieldType;
289   // Protozero message which this field belongs to.
290   using message_type = MessageType;
291 };
292 
293 }  // namespace proto_utils
294 }  // namespace protozero
295 
296 #endif  // INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
297