1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #ifndef UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
9 #define UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
10
11 #include <string.h>
12
13 #include "upb/mem/arena.h"
14
15 // Must be last.
16 #include "upb/port/def.inc"
17
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21
22 // The maximum number of bytes a single protobuf field can take up in the
23 // wire format. We only want to do one bounds check per field, so the input
24 // stream guarantees that after upb_EpsCopyInputStream_IsDone() is called,
25 // the decoder can read this many bytes without performing another bounds
26 // check. The stream will copy into a patch buffer as necessary to guarantee
27 // this invariant.
28 #define kUpb_EpsCopyInputStream_SlopBytes 16
29
30 enum {
31 kUpb_EpsCopyInputStream_NoAliasing = 0,
32 kUpb_EpsCopyInputStream_OnPatch = 1,
33 kUpb_EpsCopyInputStream_NoDelta = 2
34 };
35
36 typedef struct {
37 const char* end; // Can read up to SlopBytes bytes beyond this.
38 const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0)
39 uintptr_t aliasing;
40 int limit; // Submessage limit relative to end
41 bool error; // To distinguish between EOF and error.
42 char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
43 } upb_EpsCopyInputStream;
44
45 // Returns true if the stream is in the error state. A stream enters the error
46 // state when the user reads past a limit (caught in IsDone()) or the
47 // ZeroCopyInputStream returns an error.
upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream * e)48 UPB_INLINE bool upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream* e) {
49 return e->error;
50 }
51
52 typedef const char* upb_EpsCopyInputStream_BufferFlipCallback(
53 upb_EpsCopyInputStream* e, const char* old_end, const char* new_start);
54
55 typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc(
56 upb_EpsCopyInputStream* e, const char* ptr, int overrun);
57
58 // Initializes a upb_EpsCopyInputStream using the contents of the buffer
59 // [*ptr, size]. Updates `*ptr` as necessary to guarantee that at least
60 // kUpb_EpsCopyInputStream_SlopBytes are available to read.
upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream * e,const char ** ptr,size_t size,bool enable_aliasing)61 UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e,
62 const char** ptr, size_t size,
63 bool enable_aliasing) {
64 if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
65 memset(&e->patch, 0, 32);
66 if (size) memcpy(&e->patch, *ptr, size);
67 e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch
68 : kUpb_EpsCopyInputStream_NoAliasing;
69 *ptr = e->patch;
70 e->end = *ptr + size;
71 e->limit = 0;
72 } else {
73 e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
74 e->limit = kUpb_EpsCopyInputStream_SlopBytes;
75 e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta
76 : kUpb_EpsCopyInputStream_NoAliasing;
77 }
78 e->limit_ptr = e->end;
79 e->error = false;
80 }
81
82 typedef enum {
83 // The current stream position is at a limit.
84 kUpb_IsDoneStatus_Done,
85
86 // The current stream position is not at a limit.
87 kUpb_IsDoneStatus_NotDone,
88
89 // The current stream position is not at a limit, and the stream needs to
90 // be flipped to a new buffer before more data can be read.
91 kUpb_IsDoneStatus_NeedFallback,
92 } upb_IsDoneStatus;
93
94 // Returns the status of the current stream position. This is a low-level
95 // function, it is simpler to call upb_EpsCopyInputStream_IsDone() if possible.
upb_EpsCopyInputStream_IsDoneStatus(upb_EpsCopyInputStream * e,const char * ptr,int * overrun)96 UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus(
97 upb_EpsCopyInputStream* e, const char* ptr, int* overrun) {
98 *overrun = ptr - e->end;
99 if (UPB_LIKELY(ptr < e->limit_ptr)) {
100 return kUpb_IsDoneStatus_NotDone;
101 } else if (UPB_LIKELY(*overrun == e->limit)) {
102 return kUpb_IsDoneStatus_Done;
103 } else {
104 return kUpb_IsDoneStatus_NeedFallback;
105 }
106 }
107
108 // Returns true if the stream has hit a limit, either the current delimited
109 // limit or the overall end-of-stream. As a side effect, this function may flip
110 // the pointer to a new buffer if there are less than
111 // kUpb_EpsCopyInputStream_SlopBytes of data to be read in the current buffer.
112 //
113 // Postcondition: if the function returns false, there are at least
114 // kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr.
upb_EpsCopyInputStream_IsDoneWithCallback(upb_EpsCopyInputStream * e,const char ** ptr,upb_EpsCopyInputStream_IsDoneFallbackFunc * func)115 UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback(
116 upb_EpsCopyInputStream* e, const char** ptr,
117 upb_EpsCopyInputStream_IsDoneFallbackFunc* func) {
118 int overrun;
119 switch (upb_EpsCopyInputStream_IsDoneStatus(e, *ptr, &overrun)) {
120 case kUpb_IsDoneStatus_Done:
121 return true;
122 case kUpb_IsDoneStatus_NotDone:
123 return false;
124 case kUpb_IsDoneStatus_NeedFallback:
125 *ptr = func(e, *ptr, overrun);
126 return *ptr == NULL;
127 }
128 UPB_UNREACHABLE();
129 }
130
131 const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback(
132 upb_EpsCopyInputStream* e, const char* ptr, int overrun);
133
134 // A simpler version of IsDoneWithCallback() that does not support a buffer flip
135 // callback. Useful in cases where we do not need to insert custom logic at
136 // every buffer flip.
137 //
138 // If this returns true, the user must call upb_EpsCopyInputStream_IsError()
139 // to distinguish between EOF and error.
upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream * e,const char ** ptr)140 UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e,
141 const char** ptr) {
142 return upb_EpsCopyInputStream_IsDoneWithCallback(
143 e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback);
144 }
145
146 // Returns the total number of bytes that are safe to read from the current
147 // buffer without reading uninitialized or unallocated memory.
148 //
149 // Note that this check does not respect any semantic limits on the stream,
150 // either limits from PushLimit() or the overall stream end, so some of these
151 // bytes may have unpredictable, nonsense values in them. The guarantee is only
152 // that the bytes are valid to read from the perspective of the C language
153 // (ie. you can read without triggering UBSAN or ASAN).
upb_EpsCopyInputStream_BytesAvailable(upb_EpsCopyInputStream * e,const char * ptr)154 UPB_INLINE size_t upb_EpsCopyInputStream_BytesAvailable(
155 upb_EpsCopyInputStream* e, const char* ptr) {
156 return (e->end - ptr) + kUpb_EpsCopyInputStream_SlopBytes;
157 }
158
159 // Returns true if the given delimited field size is valid (it does not extend
160 // beyond any previously-pushed limits). `ptr` should point to the beginning
161 // of the field data, after the delimited size.
162 //
163 // Note that this does *not* guarantee that all of the data for this field is in
164 // the current buffer.
upb_EpsCopyInputStream_CheckSize(const upb_EpsCopyInputStream * e,const char * ptr,int size)165 UPB_INLINE bool upb_EpsCopyInputStream_CheckSize(
166 const upb_EpsCopyInputStream* e, const char* ptr, int size) {
167 UPB_ASSERT(size >= 0);
168 return ptr - e->end + size <= e->limit;
169 }
170
_upb_EpsCopyInputStream_CheckSizeAvailable(upb_EpsCopyInputStream * e,const char * ptr,int size,bool submessage)171 UPB_INLINE bool _upb_EpsCopyInputStream_CheckSizeAvailable(
172 upb_EpsCopyInputStream* e, const char* ptr, int size, bool submessage) {
173 // This is one extra branch compared to the more normal:
174 // return (size_t)(end - ptr) < size;
175 // However it is one less computation if we are just about to use "ptr + len":
176 // https://godbolt.org/z/35YGPz
177 // In microbenchmarks this shows a small improvement.
178 uintptr_t uptr = (uintptr_t)ptr;
179 uintptr_t uend = (uintptr_t)e->limit_ptr;
180 uintptr_t res = uptr + (size_t)size;
181 if (!submessage) uend += kUpb_EpsCopyInputStream_SlopBytes;
182 // NOTE: this check depends on having a linear address space. This is not
183 // technically guaranteed by uintptr_t.
184 bool ret = res >= uptr && res <= uend;
185 if (size < 0) UPB_ASSERT(!ret);
186 return ret;
187 }
188
189 // Returns true if the given delimited field size is valid (it does not extend
190 // beyond any previously-pushed limited) *and* all of the data for this field is
191 // available to be read in the current buffer.
192 //
193 // If the size is negative, this function will always return false. This
194 // property can be useful in some cases.
upb_EpsCopyInputStream_CheckDataSizeAvailable(upb_EpsCopyInputStream * e,const char * ptr,int size)195 UPB_INLINE bool upb_EpsCopyInputStream_CheckDataSizeAvailable(
196 upb_EpsCopyInputStream* e, const char* ptr, int size) {
197 return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, false);
198 }
199
200 // Returns true if the given sub-message size is valid (it does not extend
201 // beyond any previously-pushed limited) *and* all of the data for this
202 // sub-message is available to be parsed in the current buffer.
203 //
204 // This implies that all fields from the sub-message can be parsed from the
205 // current buffer while maintaining the invariant that we always have at least
206 // kUpb_EpsCopyInputStream_SlopBytes of data available past the beginning of
207 // any individual field start.
208 //
209 // If the size is negative, this function will always return false. This
210 // property can be useful in some cases.
upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(upb_EpsCopyInputStream * e,const char * ptr,int size)211 UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(
212 upb_EpsCopyInputStream* e, const char* ptr, int size) {
213 return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true);
214 }
215
216 // Returns true if aliasing_enabled=true was passed to
217 // upb_EpsCopyInputStream_Init() when this stream was initialized.
upb_EpsCopyInputStream_AliasingEnabled(upb_EpsCopyInputStream * e)218 UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled(
219 upb_EpsCopyInputStream* e) {
220 return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing;
221 }
222
223 // Returns true if aliasing_enabled=true was passed to
224 // upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can
225 // alias into the region [ptr, size] in an input buffer.
upb_EpsCopyInputStream_AliasingAvailable(upb_EpsCopyInputStream * e,const char * ptr,size_t size)226 UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable(
227 upb_EpsCopyInputStream* e, const char* ptr, size_t size) {
228 // When EpsCopyInputStream supports streaming, this will need to become a
229 // runtime check.
230 return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) &&
231 e->aliasing >= kUpb_EpsCopyInputStream_NoDelta;
232 }
233
234 // Returns a pointer into an input buffer that corresponds to the parsing
235 // pointer `ptr`. The returned pointer may be the same as `ptr`, but also may
236 // be different if we are currently parsing out of the patch buffer.
237 //
238 // REQUIRES: Aliasing must be available for the given pointer. If the input is a
239 // flat buffer and aliasing is enabled, then aliasing will always be available.
upb_EpsCopyInputStream_GetAliasedPtr(upb_EpsCopyInputStream * e,const char * ptr)240 UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr(
241 upb_EpsCopyInputStream* e, const char* ptr) {
242 UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0));
243 uintptr_t delta =
244 e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing;
245 return (const char*)((uintptr_t)ptr + delta);
246 }
247
248 // Reads string data from the input, aliasing into the input buffer instead of
249 // copying. The parsing pointer is passed in `*ptr`, and will be updated if
250 // necessary to point to the actual input buffer. Returns the new parsing
251 // pointer, which will be advanced past the string data.
252 //
253 // REQUIRES: Aliasing must be available for this data region (test with
254 // upb_EpsCopyInputStream_AliasingAvailable().
upb_EpsCopyInputStream_ReadStringAliased(upb_EpsCopyInputStream * e,const char ** ptr,size_t size)255 UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased(
256 upb_EpsCopyInputStream* e, const char** ptr, size_t size) {
257 UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size));
258 const char* ret = *ptr + size;
259 *ptr = upb_EpsCopyInputStream_GetAliasedPtr(e, *ptr);
260 UPB_ASSUME(ret != NULL);
261 return ret;
262 }
263
264 // Skips `size` bytes of data from the input and returns a pointer past the end.
265 // Returns NULL on end of stream or error.
upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream * e,const char * ptr,int size)266 UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e,
267 const char* ptr, int size) {
268 if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
269 return ptr + size;
270 }
271
272 // Copies `size` bytes of data from the input `ptr` into the buffer `to`, and
273 // returns a pointer past the end. Returns NULL on end of stream or error.
upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream * e,const char * ptr,void * to,int size)274 UPB_INLINE const char* upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream* e,
275 const char* ptr, void* to,
276 int size) {
277 if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
278 memcpy(to, ptr, size);
279 return ptr + size;
280 }
281
282 // Reads string data from the stream and advances the pointer accordingly.
283 // If aliasing was enabled when the stream was initialized, then the returned
284 // pointer will point into the input buffer if possible, otherwise new data
285 // will be allocated from arena and copied into. We may be forced to copy even
286 // if aliasing was enabled if the input data spans input buffers.
287 //
288 // Returns NULL if memory allocation failed, or we reached a premature EOF.
upb_EpsCopyInputStream_ReadString(upb_EpsCopyInputStream * e,const char ** ptr,size_t size,upb_Arena * arena)289 UPB_INLINE const char* upb_EpsCopyInputStream_ReadString(
290 upb_EpsCopyInputStream* e, const char** ptr, size_t size,
291 upb_Arena* arena) {
292 if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) {
293 return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size);
294 } else {
295 // We need to allocate and copy.
296 if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) {
297 return NULL;
298 }
299 UPB_ASSERT(arena);
300 char* data = (char*)upb_Arena_Malloc(arena, size);
301 if (!data) return NULL;
302 const char* ret = upb_EpsCopyInputStream_Copy(e, *ptr, data, size);
303 *ptr = data;
304 return ret;
305 }
306 }
307
_upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream * e)308 UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) {
309 UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
310 }
311
312 // Pushes a limit onto the stack of limits for the current stream. The limit
313 // will extend for `size` bytes beyond the position in `ptr`. Future calls to
314 // upb_EpsCopyInputStream_IsDone() will return `true` when the stream position
315 // reaches this limit.
316 //
317 // Returns a delta that the caller must store and supply to PopLimit() below.
upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream * e,const char * ptr,int size)318 UPB_INLINE int upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream* e,
319 const char* ptr, int size) {
320 int limit = size + (int)(ptr - e->end);
321 int delta = e->limit - limit;
322 _upb_EpsCopyInputStream_CheckLimit(e);
323 UPB_ASSERT(limit <= e->limit);
324 e->limit = limit;
325 e->limit_ptr = e->end + UPB_MIN(0, limit);
326 _upb_EpsCopyInputStream_CheckLimit(e);
327 return delta;
328 }
329
330 // Pops the last limit that was pushed on this stream. This may only be called
331 // once IsDone() returns true. The user must pass the delta that was returned
332 // from PushLimit().
upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream * e,const char * ptr,int saved_delta)333 UPB_INLINE void upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream* e,
334 const char* ptr,
335 int saved_delta) {
336 UPB_ASSERT(ptr - e->end == e->limit);
337 _upb_EpsCopyInputStream_CheckLimit(e);
338 e->limit += saved_delta;
339 e->limit_ptr = e->end + UPB_MIN(0, e->limit);
340 _upb_EpsCopyInputStream_CheckLimit(e);
341 }
342
_upb_EpsCopyInputStream_IsDoneFallbackInline(upb_EpsCopyInputStream * e,const char * ptr,int overrun,upb_EpsCopyInputStream_BufferFlipCallback * callback)343 UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline(
344 upb_EpsCopyInputStream* e, const char* ptr, int overrun,
345 upb_EpsCopyInputStream_BufferFlipCallback* callback) {
346 if (overrun < e->limit) {
347 // Need to copy remaining data into patch buffer.
348 UPB_ASSERT(overrun < kUpb_EpsCopyInputStream_SlopBytes);
349 const char* old_end = ptr;
350 const char* new_start = &e->patch[0] + overrun;
351 memset(e->patch + kUpb_EpsCopyInputStream_SlopBytes, 0,
352 kUpb_EpsCopyInputStream_SlopBytes);
353 memcpy(e->patch, e->end, kUpb_EpsCopyInputStream_SlopBytes);
354 ptr = new_start;
355 e->end = &e->patch[kUpb_EpsCopyInputStream_SlopBytes];
356 e->limit -= kUpb_EpsCopyInputStream_SlopBytes;
357 e->limit_ptr = e->end + e->limit;
358 UPB_ASSERT(ptr < e->limit_ptr);
359 if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) {
360 e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start;
361 }
362 return callback(e, old_end, new_start);
363 } else {
364 UPB_ASSERT(overrun > e->limit);
365 e->error = true;
366 return callback(e, NULL, NULL);
367 }
368 }
369
370 typedef const char* upb_EpsCopyInputStream_ParseDelimitedFunc(
371 upb_EpsCopyInputStream* e, const char* ptr, void* ctx);
372
373 // Tries to perform a fast-path handling of the given delimited message data.
374 // If the sub-message beginning at `*ptr` and extending for `len` is short and
375 // fits within this buffer, calls `func` with `ctx` as a parameter, where the
376 // pushing and popping of limits is handled automatically and with lower cost
377 // than the normal PushLimit()/PopLimit() sequence.
upb_EpsCopyInputStream_TryParseDelimitedFast(upb_EpsCopyInputStream * e,const char ** ptr,int len,upb_EpsCopyInputStream_ParseDelimitedFunc * func,void * ctx)378 UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast(
379 upb_EpsCopyInputStream* e, const char** ptr, int len,
380 upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
381 if (!upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(e, *ptr, len)) {
382 return false;
383 }
384
385 // Fast case: Sub-message is <128 bytes and fits in the current buffer.
386 // This means we can preserve limit/limit_ptr verbatim.
387 const char* saved_limit_ptr = e->limit_ptr;
388 int saved_limit = e->limit;
389 e->limit_ptr = *ptr + len;
390 e->limit = e->limit_ptr - e->end;
391 UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
392 *ptr = func(e, *ptr, ctx);
393 e->limit_ptr = saved_limit_ptr;
394 e->limit = saved_limit;
395 UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
396 return true;
397 }
398
399 #ifdef __cplusplus
400 } /* extern "C" */
401 #endif
402
403 #include "upb/port/undef.inc"
404
405 #endif // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
406