1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
18 #define INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
19
20 #include <inttypes.h>
21 #include <stddef.h>
22
23 #include <type_traits>
24
25 #include "perfetto/base/logging.h"
26 #include "perfetto/base/utils.h"
27
28 namespace protozero {
29 namespace proto_utils {
30
31 // See https://developers.google.com/protocol-buffers/docs/encoding wire types.
32 // This is a type encoded into the proto that provides just enough info to
33 // find the length of the following value.
34 enum class ProtoWireType : uint32_t {
35 kVarInt = 0,
36 kFixed64 = 1,
37 kLengthDelimited = 2,
38 kFixed32 = 5,
39 };
40
41 // This is the type defined in the proto for each field. This information
42 // is used to decide the translation strategy when writing the trace.
43 enum class ProtoSchemaType {
44 kUnknown = 0,
45 kDouble,
46 kFloat,
47 kInt64,
48 kUint64,
49 kInt32,
50 kFixed64,
51 kFixed32,
52 kBool,
53 kString,
54 kGroup, // Deprecated (proto2 only)
55 kMessage,
56 kBytes,
57 kUint32,
58 kEnum,
59 kSfixed32,
60 kSfixed64,
61 kSint32,
62 kSint64,
63 };
64
ProtoSchemaToString(ProtoSchemaType v)65 inline const char* ProtoSchemaToString(ProtoSchemaType v) {
66 switch (v) {
67 case ProtoSchemaType::kUnknown:
68 return "unknown";
69 case ProtoSchemaType::kDouble:
70 return "double";
71 case ProtoSchemaType::kFloat:
72 return "float";
73 case ProtoSchemaType::kInt64:
74 return "int64";
75 case ProtoSchemaType::kUint64:
76 return "uint64";
77 case ProtoSchemaType::kInt32:
78 return "int32";
79 case ProtoSchemaType::kFixed64:
80 return "fixed64";
81 case ProtoSchemaType::kFixed32:
82 return "fixed32";
83 case ProtoSchemaType::kBool:
84 return "bool";
85 case ProtoSchemaType::kString:
86 return "string";
87 case ProtoSchemaType::kGroup:
88 return "group";
89 case ProtoSchemaType::kMessage:
90 return "message";
91 case ProtoSchemaType::kBytes:
92 return "bytes";
93 case ProtoSchemaType::kUint32:
94 return "uint32";
95 case ProtoSchemaType::kEnum:
96 return "enum";
97 case ProtoSchemaType::kSfixed32:
98 return "sfixed32";
99 case ProtoSchemaType::kSfixed64:
100 return "sfixed64";
101 case ProtoSchemaType::kSint32:
102 return "sint32";
103 case ProtoSchemaType::kSint64:
104 return "sint64";
105 }
106 // For gcc:
107 PERFETTO_DCHECK(false);
108 return "";
109 }
110
111 // Maximum message size supported: 256 MiB (4 x 7-bit due to varint encoding).
112 constexpr size_t kMessageLengthFieldSize = 4;
113 constexpr size_t kMaxMessageLength = (1u << (kMessageLengthFieldSize * 7)) - 1;
114
115 // Field tag is encoded as 32-bit varint (5 bytes at most).
116 // Largest value of simple (not length-delimited) field is 64-bit varint
117 // (10 bytes at most). 15 bytes buffer is enough to store a simple field.
118 constexpr size_t kMaxTagEncodedSize = 5;
119 constexpr size_t kMaxSimpleFieldEncodedSize = kMaxTagEncodedSize + 10;
120
121 // Proto types: (int|uint|sint)(32|64), bool, enum.
MakeTagVarInt(uint32_t field_id)122 constexpr uint32_t MakeTagVarInt(uint32_t field_id) {
123 return (field_id << 3) | static_cast<uint32_t>(ProtoWireType::kVarInt);
124 }
125
126 // Proto types: fixed64, sfixed64, fixed32, sfixed32, double, float.
127 template <typename T>
MakeTagFixed(uint32_t field_id)128 constexpr uint32_t MakeTagFixed(uint32_t field_id) {
129 static_assert(sizeof(T) == 8 || sizeof(T) == 4, "Value must be 4 or 8 bytes");
130 return (field_id << 3) |
131 static_cast<uint32_t>((sizeof(T) == 8 ? ProtoWireType::kFixed64
132 : ProtoWireType::kFixed32));
133 }
134
135 // Proto types: string, bytes, embedded messages.
MakeTagLengthDelimited(uint32_t field_id)136 constexpr uint32_t MakeTagLengthDelimited(uint32_t field_id) {
137 return (field_id << 3) |
138 static_cast<uint32_t>(ProtoWireType::kLengthDelimited);
139 }
140
141 // Proto types: sint64, sint32.
142 template <typename T>
ZigZagEncode(T value)143 inline typename std::make_unsigned<T>::type ZigZagEncode(T value) {
144 return static_cast<typename std::make_unsigned<T>::type>(
145 (value << 1) ^ (value >> (sizeof(T) * 8 - 1)));
146 }
147
148 template <typename T>
WriteVarInt(T value,uint8_t * target)149 inline uint8_t* WriteVarInt(T value, uint8_t* target) {
150 // If value is <= 0 we must first sign extend to int64_t (see [1]).
151 // Finally we always cast to an unsigned value to to avoid arithmetic
152 // (sign expanding) shifts in the while loop.
153 // [1]: "If you use int32 or int64 as the type for a negative number, the
154 // resulting varint is always ten bytes long".
155 // - developers.google.com/protocol-buffers/docs/encoding
156 // So for each input type we do the following casts:
157 // uintX_t -> uintX_t -> uintX_t
158 // int8_t -> int64_t -> uint64_t
159 // int16_t -> int64_t -> uint64_t
160 // int32_t -> int64_t -> uint64_t
161 // int64_t -> int64_t -> uint64_t
162 using MaybeExtendedType =
163 typename std::conditional<std::is_unsigned<T>::value, T, int64_t>::type;
164 using UnsignedType = typename std::make_unsigned<MaybeExtendedType>::type;
165
166 MaybeExtendedType extended_value = static_cast<MaybeExtendedType>(value);
167 UnsignedType unsigned_value = static_cast<UnsignedType>(extended_value);
168
169 while (unsigned_value >= 0x80) {
170 *target++ = static_cast<uint8_t>(unsigned_value) | 0x80;
171 unsigned_value >>= 7;
172 }
173 *target = static_cast<uint8_t>(unsigned_value);
174 return target + 1;
175 }
176
177 // Writes a fixed-size redundant encoding of the given |value|. This is
178 // used to backfill fixed-size reservations for the length field using a
179 // non-canonical varint encoding (e.g. \x81\x80\x80\x00 instead of \x01).
180 // See https://github.com/google/protobuf/issues/1530.
181 // In particular, this is used for nested messages. The size of a nested message
182 // is not known until all its field have been written. |kMessageLengthFieldSize|
183 // bytes are reserved to encode the size field and backfilled at the end.
WriteRedundantVarInt(uint32_t value,uint8_t * buf)184 inline void WriteRedundantVarInt(uint32_t value, uint8_t* buf) {
185 for (size_t i = 0; i < kMessageLengthFieldSize; ++i) {
186 const uint8_t msb = (i < kMessageLengthFieldSize - 1) ? 0x80 : 0;
187 buf[i] = static_cast<uint8_t>(value) | msb;
188 value >>= 7;
189 }
190 }
191
192 template <uint32_t field_id>
StaticAssertSingleBytePreamble()193 void StaticAssertSingleBytePreamble() {
194 static_assert(field_id < 16,
195 "Proto field id too big to fit in a single byte preamble");
196 }
197
198 // Parses a VarInt from the encoded buffer [start, end). |end| is STL-style and
199 // points one byte past the end of buffer.
200 // The parsed int value is stored in the output arg |value|. Returns a pointer
201 // to the next unconsumed byte (so start < retval <= end) or |start| if the
202 // VarInt could not be fully parsed because there was not enough space in the
203 // buffer.
ParseVarInt(const uint8_t * start,const uint8_t * end,uint64_t * value)204 inline const uint8_t* ParseVarInt(const uint8_t* start,
205 const uint8_t* end,
206 uint64_t* value) {
207 const uint8_t* pos = start;
208 uint64_t shift = 0;
209 *value = 0;
210 do {
211 if (PERFETTO_UNLIKELY(pos >= end)) {
212 *value = 0;
213 return start;
214 }
215 PERFETTO_DCHECK(shift < 64ull);
216 *value |= static_cast<uint64_t>(*pos & 0x7f) << shift;
217 shift += 7;
218 } while (*pos++ & 0x80);
219 return pos;
220 }
221
222 } // namespace proto_utils
223 } // namespace protozero
224
225 #endif // INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
226