• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2024 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/debugging/internal/utf8_for_code_point.h"
16 
17 #include <cstdint>
18 
19 #include "gtest/gtest.h"
20 #include "absl/base/config.h"
21 
22 namespace absl {
23 ABSL_NAMESPACE_BEGIN
24 namespace debugging_internal {
25 namespace {
26 
TEST(Utf8ForCodePointTest,RecognizesTheSmallestCodePoint)27 TEST(Utf8ForCodePointTest, RecognizesTheSmallestCodePoint) {
28   Utf8ForCodePoint utf8(uint64_t{0});
29   ASSERT_EQ(utf8.length, 1);
30   EXPECT_EQ(utf8.bytes[0], '\0');
31 }
32 
TEST(Utf8ForCodePointTest,RecognizesAsciiSmallA)33 TEST(Utf8ForCodePointTest, RecognizesAsciiSmallA) {
34   Utf8ForCodePoint utf8(uint64_t{'a'});
35   ASSERT_EQ(utf8.length, 1);
36   EXPECT_EQ(utf8.bytes[0], 'a');
37 }
38 
TEST(Utf8ForCodePointTest,RecognizesTheLargestOneByteCodePoint)39 TEST(Utf8ForCodePointTest, RecognizesTheLargestOneByteCodePoint) {
40   Utf8ForCodePoint utf8(uint64_t{0x7f});
41   ASSERT_EQ(utf8.length, 1);
42   EXPECT_EQ(utf8.bytes[0], '\x7f');
43 }
44 
TEST(Utf8ForCodePointTest,RecognizesTheSmallestTwoByteCodePoint)45 TEST(Utf8ForCodePointTest, RecognizesTheSmallestTwoByteCodePoint) {
46   Utf8ForCodePoint utf8(uint64_t{0x80});
47   ASSERT_EQ(utf8.length, 2);
48   EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xc2));
49   EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x80));
50 }
51 
TEST(Utf8ForCodePointTest,RecognizesSmallNWithTilde)52 TEST(Utf8ForCodePointTest, RecognizesSmallNWithTilde) {
53   Utf8ForCodePoint utf8(uint64_t{0xf1});
54   ASSERT_EQ(utf8.length, 2);
55   const char* want = "ñ";
56   EXPECT_EQ(utf8.bytes[0], want[0]);
57   EXPECT_EQ(utf8.bytes[1], want[1]);
58 }
59 
TEST(Utf8ForCodePointTest,RecognizesCapitalPi)60 TEST(Utf8ForCodePointTest, RecognizesCapitalPi) {
61   Utf8ForCodePoint utf8(uint64_t{0x3a0});
62   ASSERT_EQ(utf8.length, 2);
63   const char* want = "Π";
64   EXPECT_EQ(utf8.bytes[0], want[0]);
65   EXPECT_EQ(utf8.bytes[1], want[1]);
66 }
67 
TEST(Utf8ForCodePointTest,RecognizesTheLargestTwoByteCodePoint)68 TEST(Utf8ForCodePointTest, RecognizesTheLargestTwoByteCodePoint) {
69   Utf8ForCodePoint utf8(uint64_t{0x7ff});
70   ASSERT_EQ(utf8.length, 2);
71   EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xdf));
72   EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xbf));
73 }
74 
TEST(Utf8ForCodePointTest,RecognizesTheSmallestThreeByteCodePoint)75 TEST(Utf8ForCodePointTest, RecognizesTheSmallestThreeByteCodePoint) {
76   Utf8ForCodePoint utf8(uint64_t{0x800});
77   ASSERT_EQ(utf8.length, 3);
78   EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xe0));
79   EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xa0));
80   EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80));
81 }
82 
TEST(Utf8ForCodePointTest,RecognizesTheChineseCharacterZhong1AsInZhong1Wen2)83 TEST(Utf8ForCodePointTest, RecognizesTheChineseCharacterZhong1AsInZhong1Wen2) {
84   Utf8ForCodePoint utf8(uint64_t{0x4e2d});
85   ASSERT_EQ(utf8.length, 3);
86   const char* want = "中";
87   EXPECT_EQ(utf8.bytes[0], want[0]);
88   EXPECT_EQ(utf8.bytes[1], want[1]);
89   EXPECT_EQ(utf8.bytes[2], want[2]);
90 }
91 
TEST(Utf8ForCodePointTest,RecognizesOneBeforeTheSmallestSurrogate)92 TEST(Utf8ForCodePointTest, RecognizesOneBeforeTheSmallestSurrogate) {
93   Utf8ForCodePoint utf8(uint64_t{0xd7ff});
94   ASSERT_EQ(utf8.length, 3);
95   EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xed));
96   EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x9f));
97   EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf));
98 }
99 
TEST(Utf8ForCodePointTest,RejectsTheSmallestSurrogate)100 TEST(Utf8ForCodePointTest, RejectsTheSmallestSurrogate) {
101   Utf8ForCodePoint utf8(uint64_t{0xd800});
102   EXPECT_EQ(utf8.length, 0);
103 }
104 
TEST(Utf8ForCodePointTest,RejectsTheLargestSurrogate)105 TEST(Utf8ForCodePointTest, RejectsTheLargestSurrogate) {
106   Utf8ForCodePoint utf8(uint64_t{0xdfff});
107   EXPECT_EQ(utf8.length, 0);
108 }
109 
TEST(Utf8ForCodePointTest,RecognizesOnePastTheLargestSurrogate)110 TEST(Utf8ForCodePointTest, RecognizesOnePastTheLargestSurrogate) {
111   Utf8ForCodePoint utf8(uint64_t{0xe000});
112   ASSERT_EQ(utf8.length, 3);
113   EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xee));
114   EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x80));
115   EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80));
116 }
117 
TEST(Utf8ForCodePointTest,RecognizesTheLargestThreeByteCodePoint)118 TEST(Utf8ForCodePointTest, RecognizesTheLargestThreeByteCodePoint) {
119   Utf8ForCodePoint utf8(uint64_t{0xffff});
120   ASSERT_EQ(utf8.length, 3);
121   EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xef));
122   EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xbf));
123   EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf));
124 }
125 
TEST(Utf8ForCodePointTest,RecognizesTheSmallestFourByteCodePoint)126 TEST(Utf8ForCodePointTest, RecognizesTheSmallestFourByteCodePoint) {
127   Utf8ForCodePoint utf8(uint64_t{0x10000});
128   ASSERT_EQ(utf8.length, 4);
129   EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xf0));
130   EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x90));
131   EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80));
132   EXPECT_EQ(utf8.bytes[3], static_cast<char>(0x80));
133 }
134 
TEST(Utf8ForCodePointTest,RecognizesTheJackOfHearts)135 TEST(Utf8ForCodePointTest, RecognizesTheJackOfHearts) {
136   Utf8ForCodePoint utf8(uint64_t{0x1f0bb});
137   ASSERT_EQ(utf8.length, 4);
138   const char* want = "��";
139   EXPECT_EQ(utf8.bytes[0], want[0]);
140   EXPECT_EQ(utf8.bytes[1], want[1]);
141   EXPECT_EQ(utf8.bytes[2], want[2]);
142   EXPECT_EQ(utf8.bytes[3], want[3]);
143 }
144 
TEST(Utf8ForCodePointTest,RecognizesTheLargestFourByteCodePoint)145 TEST(Utf8ForCodePointTest, RecognizesTheLargestFourByteCodePoint) {
146   Utf8ForCodePoint utf8(uint64_t{0x10ffff});
147   ASSERT_EQ(utf8.length, 4);
148   EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xf4));
149   EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x8f));
150   EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf));
151   EXPECT_EQ(utf8.bytes[3], static_cast<char>(0xbf));
152 }
153 
TEST(Utf8ForCodePointTest,RejectsTheSmallestOverlargeCodePoint)154 TEST(Utf8ForCodePointTest, RejectsTheSmallestOverlargeCodePoint) {
155   Utf8ForCodePoint utf8(uint64_t{0x110000});
156   EXPECT_EQ(utf8.length, 0);
157 }
158 
TEST(Utf8ForCodePointTest,RejectsAThroughlyOverlargeCodePoint)159 TEST(Utf8ForCodePointTest, RejectsAThroughlyOverlargeCodePoint) {
160   Utf8ForCodePoint utf8(uint64_t{0xffffffff00000000});
161   EXPECT_EQ(utf8.length, 0);
162 }
163 
TEST(Utf8ForCodePointTest,OkReturnsTrueForAValidCodePoint)164 TEST(Utf8ForCodePointTest, OkReturnsTrueForAValidCodePoint) {
165   EXPECT_TRUE(Utf8ForCodePoint(uint64_t{0}).ok());
166 }
167 
TEST(Utf8ForCodePointTest,OkReturnsFalseForAnInvalidCodePoint)168 TEST(Utf8ForCodePointTest, OkReturnsFalseForAnInvalidCodePoint) {
169   EXPECT_FALSE(Utf8ForCodePoint(uint64_t{0xffffffff00000000}).ok());
170 }
171 
172 }  // namespace
173 }  // namespace debugging_internal
174 ABSL_NAMESPACE_END
175 }  // namespace absl
176