• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #ifndef UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
9 #define UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
10 
11 #include <string.h>
12 
13 #include "upb/mem/arena.h"
14 
15 // Must be last.
16 #include "upb/port/def.inc"
17 
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22 // The maximum number of bytes a single protobuf field can take up in the
23 // wire format.  We only want to do one bounds check per field, so the input
24 // stream guarantees that after upb_EpsCopyInputStream_IsDone() is called,
25 // the decoder can read this many bytes without performing another bounds
26 // check.  The stream will copy into a patch buffer as necessary to guarantee
27 // this invariant.
28 #define kUpb_EpsCopyInputStream_SlopBytes 16
29 
30 enum {
31   kUpb_EpsCopyInputStream_NoAliasing = 0,
32   kUpb_EpsCopyInputStream_OnPatch = 1,
33   kUpb_EpsCopyInputStream_NoDelta = 2
34 };
35 
36 typedef struct {
37   const char* end;        // Can read up to SlopBytes bytes beyond this.
38   const char* limit_ptr;  // For bounds checks, = end + UPB_MIN(limit, 0)
39   uintptr_t aliasing;
40   int limit;   // Submessage limit relative to end
41   bool error;  // To distinguish between EOF and error.
42   char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
43 } upb_EpsCopyInputStream;
44 
45 // Returns true if the stream is in the error state. A stream enters the error
46 // state when the user reads past a limit (caught in IsDone()) or the
47 // ZeroCopyInputStream returns an error.
upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream * e)48 UPB_INLINE bool upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream* e) {
49   return e->error;
50 }
51 
52 typedef const char* upb_EpsCopyInputStream_BufferFlipCallback(
53     upb_EpsCopyInputStream* e, const char* old_end, const char* new_start);
54 
55 typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc(
56     upb_EpsCopyInputStream* e, const char* ptr, int overrun);
57 
58 // Initializes a upb_EpsCopyInputStream using the contents of the buffer
59 // [*ptr, size].  Updates `*ptr` as necessary to guarantee that at least
60 // kUpb_EpsCopyInputStream_SlopBytes are available to read.
upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream * e,const char ** ptr,size_t size,bool enable_aliasing)61 UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e,
62                                             const char** ptr, size_t size,
63                                             bool enable_aliasing) {
64   if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
65     memset(&e->patch, 0, 32);
66     if (size) memcpy(&e->patch, *ptr, size);
67     e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch
68                                   : kUpb_EpsCopyInputStream_NoAliasing;
69     *ptr = e->patch;
70     e->end = *ptr + size;
71     e->limit = 0;
72   } else {
73     e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
74     e->limit = kUpb_EpsCopyInputStream_SlopBytes;
75     e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta
76                                   : kUpb_EpsCopyInputStream_NoAliasing;
77   }
78   e->limit_ptr = e->end;
79   e->error = false;
80 }
81 
82 typedef enum {
83   // The current stream position is at a limit.
84   kUpb_IsDoneStatus_Done,
85 
86   // The current stream position is not at a limit.
87   kUpb_IsDoneStatus_NotDone,
88 
89   // The current stream position is not at a limit, and the stream needs to
90   // be flipped to a new buffer before more data can be read.
91   kUpb_IsDoneStatus_NeedFallback,
92 } upb_IsDoneStatus;
93 
94 // Returns the status of the current stream position.  This is a low-level
95 // function, it is simpler to call upb_EpsCopyInputStream_IsDone() if possible.
upb_EpsCopyInputStream_IsDoneStatus(upb_EpsCopyInputStream * e,const char * ptr,int * overrun)96 UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus(
97     upb_EpsCopyInputStream* e, const char* ptr, int* overrun) {
98   *overrun = ptr - e->end;
99   if (UPB_LIKELY(ptr < e->limit_ptr)) {
100     return kUpb_IsDoneStatus_NotDone;
101   } else if (UPB_LIKELY(*overrun == e->limit)) {
102     return kUpb_IsDoneStatus_Done;
103   } else {
104     return kUpb_IsDoneStatus_NeedFallback;
105   }
106 }
107 
108 // Returns true if the stream has hit a limit, either the current delimited
109 // limit or the overall end-of-stream. As a side effect, this function may flip
110 // the pointer to a new buffer if there are less than
111 // kUpb_EpsCopyInputStream_SlopBytes of data to be read in the current buffer.
112 //
113 // Postcondition: if the function returns false, there are at least
114 // kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr.
upb_EpsCopyInputStream_IsDoneWithCallback(upb_EpsCopyInputStream * e,const char ** ptr,upb_EpsCopyInputStream_IsDoneFallbackFunc * func)115 UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback(
116     upb_EpsCopyInputStream* e, const char** ptr,
117     upb_EpsCopyInputStream_IsDoneFallbackFunc* func) {
118   int overrun;
119   switch (upb_EpsCopyInputStream_IsDoneStatus(e, *ptr, &overrun)) {
120     case kUpb_IsDoneStatus_Done:
121       return true;
122     case kUpb_IsDoneStatus_NotDone:
123       return false;
124     case kUpb_IsDoneStatus_NeedFallback:
125       *ptr = func(e, *ptr, overrun);
126       return *ptr == NULL;
127   }
128   UPB_UNREACHABLE();
129 }
130 
131 const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback(
132     upb_EpsCopyInputStream* e, const char* ptr, int overrun);
133 
134 // A simpler version of IsDoneWithCallback() that does not support a buffer flip
135 // callback. Useful in cases where we do not need to insert custom logic at
136 // every buffer flip.
137 //
138 // If this returns true, the user must call upb_EpsCopyInputStream_IsError()
139 // to distinguish between EOF and error.
upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream * e,const char ** ptr)140 UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e,
141                                               const char** ptr) {
142   return upb_EpsCopyInputStream_IsDoneWithCallback(
143       e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback);
144 }
145 
146 // Returns the total number of bytes that are safe to read from the current
147 // buffer without reading uninitialized or unallocated memory.
148 //
149 // Note that this check does not respect any semantic limits on the stream,
150 // either limits from PushLimit() or the overall stream end, so some of these
151 // bytes may have unpredictable, nonsense values in them. The guarantee is only
152 // that the bytes are valid to read from the perspective of the C language
153 // (ie. you can read without triggering UBSAN or ASAN).
upb_EpsCopyInputStream_BytesAvailable(upb_EpsCopyInputStream * e,const char * ptr)154 UPB_INLINE size_t upb_EpsCopyInputStream_BytesAvailable(
155     upb_EpsCopyInputStream* e, const char* ptr) {
156   return (e->end - ptr) + kUpb_EpsCopyInputStream_SlopBytes;
157 }
158 
159 // Returns true if the given delimited field size is valid (it does not extend
160 // beyond any previously-pushed limits).  `ptr` should point to the beginning
161 // of the field data, after the delimited size.
162 //
163 // Note that this does *not* guarantee that all of the data for this field is in
164 // the current buffer.
upb_EpsCopyInputStream_CheckSize(const upb_EpsCopyInputStream * e,const char * ptr,int size)165 UPB_INLINE bool upb_EpsCopyInputStream_CheckSize(
166     const upb_EpsCopyInputStream* e, const char* ptr, int size) {
167   UPB_ASSERT(size >= 0);
168   return ptr - e->end + size <= e->limit;
169 }
170 
_upb_EpsCopyInputStream_CheckSizeAvailable(upb_EpsCopyInputStream * e,const char * ptr,int size,bool submessage)171 UPB_INLINE bool _upb_EpsCopyInputStream_CheckSizeAvailable(
172     upb_EpsCopyInputStream* e, const char* ptr, int size, bool submessage) {
173   // This is one extra branch compared to the more normal:
174   //   return (size_t)(end - ptr) < size;
175   // However it is one less computation if we are just about to use "ptr + len":
176   //   https://godbolt.org/z/35YGPz
177   // In microbenchmarks this shows a small improvement.
178   uintptr_t uptr = (uintptr_t)ptr;
179   uintptr_t uend = (uintptr_t)e->limit_ptr;
180   uintptr_t res = uptr + (size_t)size;
181   if (!submessage) uend += kUpb_EpsCopyInputStream_SlopBytes;
182   // NOTE: this check depends on having a linear address space.  This is not
183   // technically guaranteed by uintptr_t.
184   bool ret = res >= uptr && res <= uend;
185   if (size < 0) UPB_ASSERT(!ret);
186   return ret;
187 }
188 
189 // Returns true if the given delimited field size is valid (it does not extend
190 // beyond any previously-pushed limited) *and* all of the data for this field is
191 // available to be read in the current buffer.
192 //
193 // If the size is negative, this function will always return false. This
194 // property can be useful in some cases.
upb_EpsCopyInputStream_CheckDataSizeAvailable(upb_EpsCopyInputStream * e,const char * ptr,int size)195 UPB_INLINE bool upb_EpsCopyInputStream_CheckDataSizeAvailable(
196     upb_EpsCopyInputStream* e, const char* ptr, int size) {
197   return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, false);
198 }
199 
200 // Returns true if the given sub-message size is valid (it does not extend
201 // beyond any previously-pushed limited) *and* all of the data for this
202 // sub-message is available to be parsed in the current buffer.
203 //
204 // This implies that all fields from the sub-message can be parsed from the
205 // current buffer while maintaining the invariant that we always have at least
206 // kUpb_EpsCopyInputStream_SlopBytes of data available past the beginning of
207 // any individual field start.
208 //
209 // If the size is negative, this function will always return false. This
210 // property can be useful in some cases.
upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(upb_EpsCopyInputStream * e,const char * ptr,int size)211 UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(
212     upb_EpsCopyInputStream* e, const char* ptr, int size) {
213   return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true);
214 }
215 
216 // Returns true if aliasing_enabled=true was passed to
217 // upb_EpsCopyInputStream_Init() when this stream was initialized.
upb_EpsCopyInputStream_AliasingEnabled(upb_EpsCopyInputStream * e)218 UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled(
219     upb_EpsCopyInputStream* e) {
220   return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing;
221 }
222 
223 // Returns true if aliasing_enabled=true was passed to
224 // upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can
225 // alias into the region [ptr, size] in an input buffer.
upb_EpsCopyInputStream_AliasingAvailable(upb_EpsCopyInputStream * e,const char * ptr,size_t size)226 UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable(
227     upb_EpsCopyInputStream* e, const char* ptr, size_t size) {
228   // When EpsCopyInputStream supports streaming, this will need to become a
229   // runtime check.
230   return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) &&
231          e->aliasing >= kUpb_EpsCopyInputStream_NoDelta;
232 }
233 
234 // Returns a pointer into an input buffer that corresponds to the parsing
235 // pointer `ptr`.  The returned pointer may be the same as `ptr`, but also may
236 // be different if we are currently parsing out of the patch buffer.
237 //
238 // REQUIRES: Aliasing must be available for the given pointer. If the input is a
239 // flat buffer and aliasing is enabled, then aliasing will always be available.
upb_EpsCopyInputStream_GetAliasedPtr(upb_EpsCopyInputStream * e,const char * ptr)240 UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr(
241     upb_EpsCopyInputStream* e, const char* ptr) {
242   UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0));
243   uintptr_t delta =
244       e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing;
245   return (const char*)((uintptr_t)ptr + delta);
246 }
247 
248 // Reads string data from the input, aliasing into the input buffer instead of
249 // copying. The parsing pointer is passed in `*ptr`, and will be updated if
250 // necessary to point to the actual input buffer. Returns the new parsing
251 // pointer, which will be advanced past the string data.
252 //
253 // REQUIRES: Aliasing must be available for this data region (test with
254 // upb_EpsCopyInputStream_AliasingAvailable().
upb_EpsCopyInputStream_ReadStringAliased(upb_EpsCopyInputStream * e,const char ** ptr,size_t size)255 UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased(
256     upb_EpsCopyInputStream* e, const char** ptr, size_t size) {
257   UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size));
258   const char* ret = *ptr + size;
259   *ptr = upb_EpsCopyInputStream_GetAliasedPtr(e, *ptr);
260   UPB_ASSUME(ret != NULL);
261   return ret;
262 }
263 
264 // Skips `size` bytes of data from the input and returns a pointer past the end.
265 // Returns NULL on end of stream or error.
upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream * e,const char * ptr,int size)266 UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e,
267                                                    const char* ptr, int size) {
268   if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
269   return ptr + size;
270 }
271 
272 // Copies `size` bytes of data from the input `ptr` into the buffer `to`, and
273 // returns a pointer past the end. Returns NULL on end of stream or error.
upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream * e,const char * ptr,void * to,int size)274 UPB_INLINE const char* upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream* e,
275                                                    const char* ptr, void* to,
276                                                    int size) {
277   if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
278   memcpy(to, ptr, size);
279   return ptr + size;
280 }
281 
282 // Reads string data from the stream and advances the pointer accordingly.
283 // If aliasing was enabled when the stream was initialized, then the returned
284 // pointer will point into the input buffer if possible, otherwise new data
285 // will be allocated from arena and copied into. We may be forced to copy even
286 // if aliasing was enabled if the input data spans input buffers.
287 //
288 // Returns NULL if memory allocation failed, or we reached a premature EOF.
upb_EpsCopyInputStream_ReadString(upb_EpsCopyInputStream * e,const char ** ptr,size_t size,upb_Arena * arena)289 UPB_INLINE const char* upb_EpsCopyInputStream_ReadString(
290     upb_EpsCopyInputStream* e, const char** ptr, size_t size,
291     upb_Arena* arena) {
292   if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) {
293     return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size);
294   } else {
295     // We need to allocate and copy.
296     if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) {
297       return NULL;
298     }
299     UPB_ASSERT(arena);
300     char* data = (char*)upb_Arena_Malloc(arena, size);
301     if (!data) return NULL;
302     const char* ret = upb_EpsCopyInputStream_Copy(e, *ptr, data, size);
303     *ptr = data;
304     return ret;
305   }
306 }
307 
_upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream * e)308 UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) {
309   UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
310 }
311 
312 // Pushes a limit onto the stack of limits for the current stream.  The limit
313 // will extend for `size` bytes beyond the position in `ptr`.  Future calls to
314 // upb_EpsCopyInputStream_IsDone() will return `true` when the stream position
315 // reaches this limit.
316 //
317 // Returns a delta that the caller must store and supply to PopLimit() below.
upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream * e,const char * ptr,int size)318 UPB_INLINE int upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream* e,
319                                                 const char* ptr, int size) {
320   int limit = size + (int)(ptr - e->end);
321   int delta = e->limit - limit;
322   _upb_EpsCopyInputStream_CheckLimit(e);
323   UPB_ASSERT(limit <= e->limit);
324   e->limit = limit;
325   e->limit_ptr = e->end + UPB_MIN(0, limit);
326   _upb_EpsCopyInputStream_CheckLimit(e);
327   return delta;
328 }
329 
330 // Pops the last limit that was pushed on this stream.  This may only be called
331 // once IsDone() returns true.  The user must pass the delta that was returned
332 // from PushLimit().
upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream * e,const char * ptr,int saved_delta)333 UPB_INLINE void upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream* e,
334                                                 const char* ptr,
335                                                 int saved_delta) {
336   UPB_ASSERT(ptr - e->end == e->limit);
337   _upb_EpsCopyInputStream_CheckLimit(e);
338   e->limit += saved_delta;
339   e->limit_ptr = e->end + UPB_MIN(0, e->limit);
340   _upb_EpsCopyInputStream_CheckLimit(e);
341 }
342 
_upb_EpsCopyInputStream_IsDoneFallbackInline(upb_EpsCopyInputStream * e,const char * ptr,int overrun,upb_EpsCopyInputStream_BufferFlipCallback * callback)343 UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline(
344     upb_EpsCopyInputStream* e, const char* ptr, int overrun,
345     upb_EpsCopyInputStream_BufferFlipCallback* callback) {
346   if (overrun < e->limit) {
347     // Need to copy remaining data into patch buffer.
348     UPB_ASSERT(overrun < kUpb_EpsCopyInputStream_SlopBytes);
349     const char* old_end = ptr;
350     const char* new_start = &e->patch[0] + overrun;
351     memset(e->patch + kUpb_EpsCopyInputStream_SlopBytes, 0,
352            kUpb_EpsCopyInputStream_SlopBytes);
353     memcpy(e->patch, e->end, kUpb_EpsCopyInputStream_SlopBytes);
354     ptr = new_start;
355     e->end = &e->patch[kUpb_EpsCopyInputStream_SlopBytes];
356     e->limit -= kUpb_EpsCopyInputStream_SlopBytes;
357     e->limit_ptr = e->end + e->limit;
358     UPB_ASSERT(ptr < e->limit_ptr);
359     if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) {
360       e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start;
361     }
362     return callback(e, old_end, new_start);
363   } else {
364     UPB_ASSERT(overrun > e->limit);
365     e->error = true;
366     return callback(e, NULL, NULL);
367   }
368 }
369 
370 typedef const char* upb_EpsCopyInputStream_ParseDelimitedFunc(
371     upb_EpsCopyInputStream* e, const char* ptr, void* ctx);
372 
373 // Tries to perform a fast-path handling of the given delimited message data.
374 // If the sub-message beginning at `*ptr` and extending for `len` is short and
375 // fits within this buffer, calls `func` with `ctx` as a parameter, where the
376 // pushing and popping of limits is handled automatically and with lower cost
377 // than the normal PushLimit()/PopLimit() sequence.
upb_EpsCopyInputStream_TryParseDelimitedFast(upb_EpsCopyInputStream * e,const char ** ptr,int len,upb_EpsCopyInputStream_ParseDelimitedFunc * func,void * ctx)378 UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast(
379     upb_EpsCopyInputStream* e, const char** ptr, int len,
380     upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
381   if (!upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(e, *ptr, len)) {
382     return false;
383   }
384 
385   // Fast case: Sub-message is <128 bytes and fits in the current buffer.
386   // This means we can preserve limit/limit_ptr verbatim.
387   const char* saved_limit_ptr = e->limit_ptr;
388   int saved_limit = e->limit;
389   e->limit_ptr = *ptr + len;
390   e->limit = e->limit_ptr - e->end;
391   UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
392   *ptr = func(e, *ptr, ctx);
393   e->limit_ptr = saved_limit_ptr;
394   e->limit = saved_limit;
395   UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
396   return true;
397 }
398 
399 #ifdef __cplusplus
400 } /* extern "C" */
401 #endif
402 
403 #include "upb/port/undef.inc"
404 
405 #endif  // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
406