1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 #pragma once
15
16 #include <stddef.h>
17 #include <stdint.h>
18
19 #include "pw_preprocessor/compiler.h"
20
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24
25 // Expose a subset of the varint API for use in C code.
26
27 typedef enum {
28 PW_VARINT_ZERO_TERMINATED_LEAST_SIGNIFICANT = 0b00,
29 PW_VARINT_ZERO_TERMINATED_MOST_SIGNIFICANT = 0b01,
30 PW_VARINT_ONE_TERMINATED_LEAST_SIGNIFICANT = 0b10,
31 PW_VARINT_ONE_TERMINATED_MOST_SIGNIFICANT = 0b11,
32 } pw_varint_Format;
33
34 size_t pw_varint_EncodeCustom(uint64_t integer,
35 void* output,
36 size_t output_size,
37 pw_varint_Format format);
38 size_t pw_varint_DecodeCustom(const void* input,
39 size_t input_size,
40 uint64_t* output,
41 pw_varint_Format format);
42
pw_varint_Encode(uint64_t integer,void * output,size_t output_size)43 static inline size_t pw_varint_Encode(uint64_t integer,
44 void* output,
45 size_t output_size) {
46 return pw_varint_EncodeCustom(
47 integer, output, output_size, PW_VARINT_ZERO_TERMINATED_MOST_SIGNIFICANT);
48 }
49
50 size_t pw_varint_ZigZagEncode(int64_t integer,
51 void* output,
52 size_t output_size);
53
pw_varint_Decode(const void * input,size_t input_size,uint64_t * output)54 static inline size_t pw_varint_Decode(const void* input,
55 size_t input_size,
56 uint64_t* output) {
57 return pw_varint_DecodeCustom(
58 input, input_size, output, PW_VARINT_ZERO_TERMINATED_MOST_SIGNIFICANT);
59 }
60
61 size_t pw_varint_ZigZagDecode(const void* input,
62 size_t input_size,
63 int64_t* output);
64
65 // Returns the size of an when encoded as a varint.
66 size_t pw_varint_EncodedSize(uint64_t integer);
67 size_t pw_varint_ZigZagEncodedSize(int64_t integer);
68
69 #ifdef __cplusplus
70
71 } // extern "C"
72
73 #include <span>
74 #include <type_traits>
75
76 #include "pw_polyfill/language_feature_macros.h"
77
78 namespace pw {
79 namespace varint {
80
81 // The maximum number of bytes occupied by an encoded varint.
82 PW_INLINE_VARIABLE constexpr size_t kMaxVarint32SizeBytes = 5;
83 PW_INLINE_VARIABLE constexpr size_t kMaxVarint64SizeBytes = 10;
84
85 // ZigZag encodes a signed integer. This maps small negative numbers to small,
86 // unsigned positive numbers, which improves their density for LEB128 encoding.
87 //
88 // ZigZag encoding works by moving the sign bit from the most-significant bit to
89 // the least-significant bit. For the signed k-bit integer n, the formula is
90 //
91 // (n << 1) ^ (n >> (k - 1))
92 //
93 // See the following for a description of ZigZag encoding:
94 // https://developers.google.com/protocol-buffers/docs/encoding#types
95 template <typename T>
ZigZagEncode(T n)96 constexpr std::make_unsigned_t<T> ZigZagEncode(T n) {
97 static_assert(std::is_signed<T>(), "Zig-zag encoding is for signed integers");
98 using U = std::make_unsigned_t<T>;
99 return (static_cast<U>(n) << 1) ^ static_cast<U>(n >> (sizeof(T) * 8 - 1));
100 }
101
102 // ZigZag decodes a signed integer.
103 // The calculation is done modulo std::numeric_limits<T>::max()+1, so the
104 // unsigned integer overflows are intentional.
105 template <typename T>
ZigZagDecode(T n)106 constexpr std::make_signed_t<T> ZigZagDecode(T n)
107 PW_NO_SANITIZE("unsigned-integer-overflow") {
108 static_assert(std::is_unsigned<T>(),
109 "Zig-zag decoding is for unsigned integers");
110 return static_cast<std::make_signed_t<T>>((n >> 1) ^ (~(n & 1) + 1));
111 }
112
113 // Encodes a uint64_t with Little-Endian Base 128 (LEB128) encoding.
EncodeLittleEndianBase128(uint64_t integer,const std::span<std::byte> & output)114 inline size_t EncodeLittleEndianBase128(uint64_t integer,
115 const std::span<std::byte>& output) {
116 return pw_varint_Encode(integer, output.data(), output.size());
117 }
118
119 // Encodes the provided integer using a variable-length encoding and returns the
120 // number of bytes written.
121 //
122 // The encoding is the same as used in protocol buffers. Signed integers are
123 // ZigZag encoded to remove leading 1s from small negative numbers, then the
124 // resulting number is encoded as Little Endian Base 128 (LEB128). Unsigned
125 // integers are encoded directly as LEB128.
126 //
127 // Returns the number of bytes written or 0 if the result didn't fit in the
128 // encoding buffer.
129 template <typename T>
Encode(T integer,const std::span<std::byte> & output)130 size_t Encode(T integer, const std::span<std::byte>& output) {
131 if (std::is_signed<T>()) {
132 return pw_varint_ZigZagEncode(integer, output.data(), output.size());
133 } else {
134 return pw_varint_Encode(integer, output.data(), output.size());
135 }
136 }
137
138 // Decodes a varint-encoded value. If reading into a signed integer, the value
139 // is ZigZag decoded.
140 //
141 // Returns the number of bytes read from the input if successful. Returns zero
142 // if the result does not fit in a int64_t / uint64_t or if the input is
143 // exhausted before the number terminates. Reads a maximum of 10 bytes.
144 //
145 // The following example decodes multiple varints from a buffer:
146 //
147 // while (!data.empty()) {
148 // int64_t value;
149 // size_t bytes = Decode(data, &value);
150 //
151 // if (bytes == 0u) {
152 // return Status::DataLoss();
153 // }
154 // results.push_back(value);
155 // data = data.subspan(bytes)
156 // }
157 //
Decode(const std::span<const std::byte> & input,int64_t * value)158 inline size_t Decode(const std::span<const std::byte>& input, int64_t* value) {
159 return pw_varint_ZigZagDecode(input.data(), input.size(), value);
160 }
161
Decode(const std::span<const std::byte> & input,uint64_t * value)162 inline size_t Decode(const std::span<const std::byte>& input, uint64_t* value) {
163 return pw_varint_Decode(input.data(), input.size(), value);
164 }
165
166 enum class Format {
167 kZeroTerminatedLeastSignificant = PW_VARINT_ZERO_TERMINATED_LEAST_SIGNIFICANT,
168 kZeroTerminatedMostSignificant = PW_VARINT_ZERO_TERMINATED_MOST_SIGNIFICANT,
169 kOneTerminatedLeastSignificant = PW_VARINT_ONE_TERMINATED_LEAST_SIGNIFICANT,
170 kOneTerminatedMostSignificant = PW_VARINT_ONE_TERMINATED_MOST_SIGNIFICANT,
171 };
172
173 // Encodes a varint in a custom format.
Encode(uint64_t value,std::span<std::byte> output,Format format)174 inline size_t Encode(uint64_t value,
175 std::span<std::byte> output,
176 Format format) {
177 return pw_varint_EncodeCustom(value,
178 output.data(),
179 output.size(),
180 static_cast<pw_varint_Format>(format));
181 }
182
183 // Decodes a varint from a custom format.
Decode(std::span<const std::byte> input,uint64_t * value,Format format)184 inline size_t Decode(std::span<const std::byte> input,
185 uint64_t* value,
186 Format format) {
187 return pw_varint_DecodeCustom(
188 input.data(), input.size(), value, static_cast<pw_varint_Format>(format));
189 }
190
191 // Returns a size of an integer when encoded as a varint.
EncodedSize(uint64_t integer)192 constexpr size_t EncodedSize(uint64_t integer) {
193 return integer == 0 ? 1 : (64 - __builtin_clzll(integer) + 6) / 7;
194 }
195
196 // Returns a size of an signed integer when ZigZag encoded as a varint.
ZigZagEncodedSize(int64_t integer)197 constexpr size_t ZigZagEncodedSize(int64_t integer) {
198 return EncodedSize(ZigZagEncode(integer));
199 }
200
201 // Returns the maximum integer value that can be encoded in a varint of the
202 // specified number of bytes.
203 //
204 // These values are also listed in the table below. Zigzag encoding cuts these
205 // in half, as positive and negative integers are alternated.
206 //
207 // Bytes Max value
208 // 1 127
209 // 2 16,383
210 // 3 2,097,151
211 // 4 268,435,455
212 // 5 34,359,738,367 -- needed for max uint32 value
213 // 6 4,398,046,511,103
214 // 7 562,949,953,421,311
215 // 8 72,057,594,037,927,935
216 // 9 9,223,372,036,854,775,807
217 // 10 uint64 max value
218 //
MaxValueInBytes(size_t bytes)219 constexpr uint64_t MaxValueInBytes(size_t bytes) {
220 return bytes >= kMaxVarint64SizeBytes ? std::numeric_limits<uint64_t>::max()
221 : (uint64_t(1) << (7 * bytes)) - 1;
222 }
223
224 } // namespace varint
225 } // namespace pw
226
227 #endif // __cplusplus
228