1 // Copyright 2024 The Abseil Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "absl/debugging/internal/utf8_for_code_point.h" 16 17 #include <cstdint> 18 19 #include "absl/base/config.h" 20 21 namespace absl { 22 ABSL_NAMESPACE_BEGIN 23 namespace debugging_internal { 24 namespace { 25 26 // UTF-8 encoding bounds. 27 constexpr uint32_t kMinSurrogate = 0xd800, kMaxSurrogate = 0xdfff; 28 constexpr uint32_t kMax1ByteCodePoint = 0x7f; 29 constexpr uint32_t kMax2ByteCodePoint = 0x7ff; 30 constexpr uint32_t kMax3ByteCodePoint = 0xffff; 31 constexpr uint32_t kMaxCodePoint = 0x10ffff; 32 33 } // namespace 34 Utf8ForCodePoint(uint64_t code_point)35Utf8ForCodePoint::Utf8ForCodePoint(uint64_t code_point) { 36 if (code_point <= kMax1ByteCodePoint) { 37 length = 1; 38 bytes[0] = static_cast<char>(code_point); 39 return; 40 } 41 42 if (code_point <= kMax2ByteCodePoint) { 43 length = 2; 44 bytes[0] = static_cast<char>(0xc0 | (code_point >> 6)); 45 bytes[1] = static_cast<char>(0x80 | (code_point & 0x3f)); 46 return; 47 } 48 49 if (kMinSurrogate <= code_point && code_point <= kMaxSurrogate) return; 50 51 if (code_point <= kMax3ByteCodePoint) { 52 length = 3; 53 bytes[0] = static_cast<char>(0xe0 | (code_point >> 12)); 54 bytes[1] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f)); 55 bytes[2] = static_cast<char>(0x80 | (code_point & 0x3f)); 56 return; 57 } 58 59 if (code_point > kMaxCodePoint) return; 60 61 length = 4; 62 bytes[0] = static_cast<char>(0xf0 | (code_point >> 18)); 63 bytes[1] = static_cast<char>(0x80 | ((code_point >> 12) & 0x3f)); 64 bytes[2] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f)); 65 bytes[3] = static_cast<char>(0x80 | (code_point & 0x3f)); 66 } 67 68 } // namespace debugging_internal 69 ABSL_NAMESPACE_END 70 } // namespace absl 71