• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2024 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/debugging/internal/utf8_for_code_point.h"
16 
17 #include <cstdint>
18 
19 #include "absl/base/config.h"
20 
21 namespace absl {
22 ABSL_NAMESPACE_BEGIN
23 namespace debugging_internal {
24 namespace {
25 
26 // UTF-8 encoding bounds.
27 constexpr uint32_t kMinSurrogate = 0xd800, kMaxSurrogate = 0xdfff;
28 constexpr uint32_t kMax1ByteCodePoint = 0x7f;
29 constexpr uint32_t kMax2ByteCodePoint = 0x7ff;
30 constexpr uint32_t kMax3ByteCodePoint = 0xffff;
31 constexpr uint32_t kMaxCodePoint = 0x10ffff;
32 
33 }  // namespace
34 
Utf8ForCodePoint(uint64_t code_point)35 Utf8ForCodePoint::Utf8ForCodePoint(uint64_t code_point) {
36   if (code_point <= kMax1ByteCodePoint) {
37     length = 1;
38     bytes[0] = static_cast<char>(code_point);
39     return;
40   }
41 
42   if (code_point <= kMax2ByteCodePoint) {
43     length = 2;
44     bytes[0] = static_cast<char>(0xc0 | (code_point >> 6));
45     bytes[1] = static_cast<char>(0x80 | (code_point & 0x3f));
46     return;
47   }
48 
49   if (kMinSurrogate <= code_point && code_point <= kMaxSurrogate) return;
50 
51   if (code_point <= kMax3ByteCodePoint) {
52     length = 3;
53     bytes[0] = static_cast<char>(0xe0 | (code_point >> 12));
54     bytes[1] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f));
55     bytes[2] = static_cast<char>(0x80 | (code_point & 0x3f));
56     return;
57   }
58 
59   if (code_point > kMaxCodePoint) return;
60 
61   length = 4;
62   bytes[0] = static_cast<char>(0xf0 | (code_point >> 18));
63   bytes[1] = static_cast<char>(0x80 | ((code_point >> 12) & 0x3f));
64   bytes[2] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f));
65   bytes[3] = static_cast<char>(0x80 | (code_point & 0x3f));
66 }
67 
68 }  // namespace debugging_internal
69 ABSL_NAMESPACE_END
70 }  // namespace absl
71