1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
18 #define INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
19
20 #include <stddef.h>
21
22 #include <cinttypes>
23 #include <type_traits>
24
25 #include "perfetto/base/logging.h"
26 #include "perfetto/public/pb_utils.h"
27
28 // Helper macro for the constexpr functions containing
29 // the switch statement: if C++14 is supported, this macro
30 // resolves to `constexpr` and just `inline` otherwise.
31 #if __cpp_constexpr >= 201304
32 #define PERFETTO_PROTOZERO_CONSTEXPR14_OR_INLINE constexpr
33 #else
34 #define PERFETTO_PROTOZERO_CONSTEXPR14_OR_INLINE inline
35 #endif
36
37 namespace protozero {
38 namespace proto_utils {
39
40 // See https://developers.google.com/protocol-buffers/docs/encoding wire types.
41 // This is a type encoded into the proto that provides just enough info to
42 // find the length of the following value.
43 enum class ProtoWireType : uint32_t {
44 kVarInt = 0,
45 kFixed64 = 1,
46 kLengthDelimited = 2,
47 kFixed32 = 5,
48 };
49
50 // This is the type defined in the proto for each field. This information
51 // is used to decide the translation strategy when writing the trace.
52 enum class ProtoSchemaType {
53 kUnknown = 0,
54 kDouble,
55 kFloat,
56 kInt64,
57 kUint64,
58 kInt32,
59 kFixed64,
60 kFixed32,
61 kBool,
62 kString,
63 kGroup, // Deprecated (proto2 only)
64 kMessage,
65 kBytes,
66 kUint32,
67 kEnum,
68 kSfixed32,
69 kSfixed64,
70 kSint32,
71 kSint64,
72 };
73
ProtoSchemaToString(ProtoSchemaType v)74 inline const char* ProtoSchemaToString(ProtoSchemaType v) {
75 switch (v) {
76 case ProtoSchemaType::kUnknown:
77 return "unknown";
78 case ProtoSchemaType::kDouble:
79 return "double";
80 case ProtoSchemaType::kFloat:
81 return "float";
82 case ProtoSchemaType::kInt64:
83 return "int64";
84 case ProtoSchemaType::kUint64:
85 return "uint64";
86 case ProtoSchemaType::kInt32:
87 return "int32";
88 case ProtoSchemaType::kFixed64:
89 return "fixed64";
90 case ProtoSchemaType::kFixed32:
91 return "fixed32";
92 case ProtoSchemaType::kBool:
93 return "bool";
94 case ProtoSchemaType::kString:
95 return "string";
96 case ProtoSchemaType::kGroup:
97 return "group";
98 case ProtoSchemaType::kMessage:
99 return "message";
100 case ProtoSchemaType::kBytes:
101 return "bytes";
102 case ProtoSchemaType::kUint32:
103 return "uint32";
104 case ProtoSchemaType::kEnum:
105 return "enum";
106 case ProtoSchemaType::kSfixed32:
107 return "sfixed32";
108 case ProtoSchemaType::kSfixed64:
109 return "sfixed64";
110 case ProtoSchemaType::kSint32:
111 return "sint32";
112 case ProtoSchemaType::kSint64:
113 return "sint64";
114 }
115 // For gcc:
116 PERFETTO_DCHECK(false);
117 return "";
118 }
119
120 // Maximum message size supported: 256 MiB (4 x 7-bit due to varint encoding).
121 constexpr size_t kMessageLengthFieldSize = 4;
122 constexpr size_t kMaxMessageLength = (1u << (kMessageLengthFieldSize * 7)) - 1;
123
124 // Field tag is encoded as 32-bit varint (5 bytes at most).
125 // Largest value of simple (not length-delimited) field is 64-bit varint
126 // (10 bytes at most). 15 bytes buffer is enough to store a simple field.
127 constexpr size_t kMaxTagEncodedSize = 5;
128 constexpr size_t kMaxSimpleFieldEncodedSize = kMaxTagEncodedSize + 10;
129
130 // Proto types: (int|uint|sint)(32|64), bool, enum.
MakeTagVarInt(uint32_t field_id)131 constexpr uint32_t MakeTagVarInt(uint32_t field_id) {
132 return (field_id << 3) | static_cast<uint32_t>(ProtoWireType::kVarInt);
133 }
134
135 // Proto types: fixed64, sfixed64, fixed32, sfixed32, double, float.
136 template <typename T>
MakeTagFixed(uint32_t field_id)137 constexpr uint32_t MakeTagFixed(uint32_t field_id) {
138 static_assert(sizeof(T) == 8 || sizeof(T) == 4, "Value must be 4 or 8 bytes");
139 return (field_id << 3) |
140 static_cast<uint32_t>((sizeof(T) == 8 ? ProtoWireType::kFixed64
141 : ProtoWireType::kFixed32));
142 }
143
144 // Proto types: string, bytes, embedded messages.
MakeTagLengthDelimited(uint32_t field_id)145 constexpr uint32_t MakeTagLengthDelimited(uint32_t field_id) {
146 return (field_id << 3) |
147 static_cast<uint32_t>(ProtoWireType::kLengthDelimited);
148 }
149
150 // Proto types: sint64, sint32.
151 template <typename T>
ZigZagEncode(T value)152 inline typename std::make_unsigned<T>::type ZigZagEncode(T value) {
153 using UnsignedType = typename std::make_unsigned<T>::type;
154
155 // Right-shift of negative values is implementation specific.
156 // Assert the implementation does what we expect, which is that shifting any
157 // positive value by sizeof(T) * 8 - 1 gives an all 0 bitmap, and a negative
158 // value gives and all 1 bitmap.
159 constexpr uint64_t kUnsignedZero = 0u;
160 constexpr int64_t kNegativeOne = -1;
161 constexpr int64_t kPositiveOne = 1;
162 static_assert(static_cast<uint64_t>(kNegativeOne >> 63) == ~kUnsignedZero,
163 "implementation does not support assumed rightshift");
164 static_assert(static_cast<uint64_t>(kPositiveOne >> 63) == kUnsignedZero,
165 "implementation does not support assumed rightshift");
166
167 return (static_cast<UnsignedType>(value) << 1) ^
168 static_cast<UnsignedType>(value >> (sizeof(T) * 8 - 1));
169 }
170
171 // Proto types: sint64, sint32.
172 template <typename T>
ZigZagDecode(T value)173 inline typename std::make_signed<T>::type ZigZagDecode(T value) {
174 using UnsignedType = typename std::make_unsigned<T>::type;
175 using SignedType = typename std::make_signed<T>::type;
176 auto u_value = static_cast<UnsignedType>(value);
177 auto mask = static_cast<UnsignedType>(-static_cast<SignedType>(u_value & 1));
178 return static_cast<SignedType>((u_value >> 1) ^ mask);
179 }
180
181 template <typename T>
182 auto ExtendValueForVarIntSerialization(T value) -> typename std::make_unsigned<
183 typename std::conditional<std::is_unsigned<T>::value, T, int64_t>::type>::
184 type {
185 // If value is <= 0 we must first sign extend to int64_t (see [1]).
186 // Finally we always cast to an unsigned value to to avoid arithmetic
187 // (sign expanding) shifts in the while loop.
188 // [1]: "If you use int32 or int64 as the type for a negative number, the
189 // resulting varint is always ten bytes long".
190 // - developers.google.com/protocol-buffers/docs/encoding
191 // So for each input type we do the following casts:
192 // uintX_t -> uintX_t -> uintX_t
193 // int8_t -> int64_t -> uint64_t
194 // int16_t -> int64_t -> uint64_t
195 // int32_t -> int64_t -> uint64_t
196 // int64_t -> int64_t -> uint64_t
197 using MaybeExtendedType =
198 typename std::conditional<std::is_unsigned<T>::value, T, int64_t>::type;
199 using UnsignedType = typename std::make_unsigned<MaybeExtendedType>::type;
200
201 MaybeExtendedType extended_value = static_cast<MaybeExtendedType>(value);
202 UnsignedType unsigned_value = static_cast<UnsignedType>(extended_value);
203
204 return unsigned_value;
205 }
206
207 template <typename T>
WriteVarInt(T value,uint8_t * target)208 inline uint8_t* WriteVarInt(T value, uint8_t* target) {
209 auto unsigned_value = ExtendValueForVarIntSerialization(value);
210
211 while (unsigned_value >= 0x80) {
212 *target++ = static_cast<uint8_t>(unsigned_value) | 0x80;
213 unsigned_value >>= 7;
214 }
215 *target = static_cast<uint8_t>(unsigned_value);
216 return target + 1;
217 }
218
219 // Writes a fixed-size redundant encoding of the given |value|. This is
220 // used to backfill fixed-size reservations for the length field using a
221 // non-canonical varint encoding (e.g. \x81\x80\x80\x00 instead of \x01).
222 // See https://github.com/google/protobuf/issues/1530.
223 // This is used mainly in two cases:
224 // 1) At trace writing time, when starting a nested messages. The size of a
225 // nested message is not known until all its field have been written.
226 // |kMessageLengthFieldSize| bytes are reserved to encode the size field and
227 // backfilled at the end.
228 // 2) When rewriting a message at trace filtering time, in protozero/filtering.
229 // At that point we know only the upper bound of the length (a filtered
230 // message is <= the original one) and we backfill after the message has been
231 // filtered.
232 inline void WriteRedundantVarInt(uint32_t value,
233 uint8_t* buf,
234 size_t size = kMessageLengthFieldSize) {
235 for (size_t i = 0; i < size; ++i) {
236 const uint8_t msb = (i < size - 1) ? 0x80 : 0;
237 buf[i] = static_cast<uint8_t>(value) | msb;
238 value >>= 7;
239 }
240 }
241
242 template <uint32_t field_id>
StaticAssertSingleBytePreamble()243 void StaticAssertSingleBytePreamble() {
244 static_assert(field_id < 16,
245 "Proto field id too big to fit in a single byte preamble");
246 }
247
248 // Parses a VarInt from the encoded buffer [start, end). |end| is STL-style and
249 // points one byte past the end of buffer.
250 // The parsed int value is stored in the output arg |value|. Returns a pointer
251 // to the next unconsumed byte (so start < retval <= end) or |start| if the
252 // VarInt could not be fully parsed because there was not enough space in the
253 // buffer.
ParseVarInt(const uint8_t * start,const uint8_t * end,uint64_t * out_value)254 inline const uint8_t* ParseVarInt(const uint8_t* start,
255 const uint8_t* end,
256 uint64_t* out_value) {
257 return PerfettoPbParseVarInt(start, end, out_value);
258 }
259
260 enum class RepetitionType {
261 kNotRepeated,
262 kRepeatedPacked,
263 kRepeatedNotPacked,
264 };
265
266 // Provide a common base struct for all templated FieldMetadata types to allow
267 // simple checks if a given type is a FieldMetadata or not.
268 struct FieldMetadataBase {
269 constexpr FieldMetadataBase() = default;
270 };
271
272 template <uint32_t field_id,
273 RepetitionType repetition_type,
274 ProtoSchemaType proto_schema_type,
275 typename CppFieldType,
276 typename MessageType>
277 struct FieldMetadata : public FieldMetadataBase {
278 constexpr FieldMetadata() = default;
279
280 static constexpr int kFieldId = field_id;
281 // Whether this field is repeated, packed (repeated [packed-true]) or not
282 // (optional).
283 static constexpr RepetitionType kRepetitionType = repetition_type;
284 // Proto type of this field (e.g. int64, fixed32 or nested message).
285 static constexpr ProtoSchemaType kProtoFieldType = proto_schema_type;
286 // C++ type of this field (for nested messages - C++ protozero class).
287 using cpp_field_type = CppFieldType;
288 // Protozero message which this field belongs to.
289 using message_type = MessageType;
290 };
291
292 } // namespace proto_utils
293 } // namespace protozero
294
295 #endif // INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
296