• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #ifndef UPB_LEX_UNICODE_H_
9 #define UPB_LEX_UNICODE_H_
10 
11 #include <stdint.h>
12 
13 // Must be last.
14 #include "upb/port/def.inc"
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif
19 
20 // Returns true iff a codepoint is the value for a high surrogate.
upb_Unicode_IsHigh(uint32_t cp)21 UPB_INLINE bool upb_Unicode_IsHigh(uint32_t cp) {
22   return (cp >= 0xd800 && cp <= 0xdbff);
23 }
24 
25 // Returns true iff a codepoint is the value for a low surrogate.
upb_Unicode_IsLow(uint32_t cp)26 UPB_INLINE bool upb_Unicode_IsLow(uint32_t cp) {
27   return (cp >= 0xdc00 && cp <= 0xdfff);
28 }
29 
30 // Returns the high 16-bit surrogate value for a supplementary codepoint.
31 // Does not sanity-check the input.
upb_Unicode_ToHigh(uint32_t cp)32 UPB_INLINE uint16_t upb_Unicode_ToHigh(uint32_t cp) {
33   return (cp >> 10) + 0xd7c0;
34 }
35 
36 // Returns the low 16-bit surrogate value for a supplementary codepoint.
37 // Does not sanity-check the input.
upb_Unicode_ToLow(uint32_t cp)38 UPB_INLINE uint16_t upb_Unicode_ToLow(uint32_t cp) {
39   return (cp & 0x3ff) | 0xdc00;
40 }
41 
42 // Returns the 32-bit value corresponding to a pair of 16-bit surrogates.
43 // Does not sanity-check the input.
upb_Unicode_FromPair(uint32_t high,uint32_t low)44 UPB_INLINE uint32_t upb_Unicode_FromPair(uint32_t high, uint32_t low) {
45   return ((high & 0x3ff) << 10) + (low & 0x3ff) + 0x10000;
46 }
47 
48 // Outputs a codepoint as UTF8.
49 // Returns the number of bytes written (1-4 on success, 0 on error).
50 // Does not sanity-check the input. Specifically does not check for surrogates.
51 int upb_Unicode_ToUTF8(uint32_t cp, char* out);
52 
53 #ifdef __cplusplus
54 } /* extern "C" */
55 #endif
56 
57 #include "upb/port/undef.inc"
58 
59 #endif /* UPB_LEX_UNICODE_H_ */
60