1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/util/i18n-utils.h"
16
17 #include <memory>
18
19 #include "gmock/gmock.h"
20 #include "gtest/gtest.h"
21 #include "unicode/uchar.h"
22
23 namespace icing {
24 namespace lib {
25 namespace {
26
27 using ::testing::Eq;
28
TEST(IcuI18nUtilsTest,IsPunctuationAtSameAsIcuIsPunct)29 TEST(IcuI18nUtilsTest, IsPunctuationAtSameAsIcuIsPunct) {
30 // Iterate through ASCII values
31 for (int i = 0; i <= 127; ++i) {
32 char ascii = i;
33
34 std::string ascii_string = "";
35 ascii_string.push_back(ascii);
36
37 EXPECT_EQ(i18n_utils::IsPunctuationAt(ascii_string, /*position=*/0),
38
39 u_ispunct(ascii));
40 }
41 }
42
TEST(IcuI18nUtilsTest,IsAlphabeticAt)43 TEST(IcuI18nUtilsTest, IsAlphabeticAt) {
44 // Test alphabetic and non-alphabetic ascii characters
45 constexpr std::string_view kSomeAscii = "iJ?9";
46 EXPECT_TRUE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/0)); // 'i'
47 EXPECT_TRUE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/1)); // 'J'
48 EXPECT_FALSE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/2)); // '?'
49 EXPECT_FALSE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/3)); // '9'
50
51 constexpr std::string_view kSomeNonAscii = "ñ①カ";
52 EXPECT_FALSE(
53 i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/0)); // ''
54 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
55 kSomeNonAscii.data(), kSomeNonAscii.length(), 0)),
56 4);
57 EXPECT_TRUE(
58 i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/4)); // 'ñ'
59 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
60 kSomeNonAscii.data(), kSomeNonAscii.length(), 4)),
61 2);
62 EXPECT_FALSE(
63 i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/6)); // '①'
64 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
65 kSomeNonAscii.data(), kSomeNonAscii.length(), 6)),
66 3);
67 EXPECT_TRUE(
68 i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/9)); // 'カ'
69 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
70 kSomeNonAscii.data(), kSomeNonAscii.length(), 9)),
71 3);
72 }
73
TEST(IcuI18nUtilsTest,GetUtf8Length)74 TEST(IcuI18nUtilsTest, GetUtf8Length) {
75 // Test alphabetic and non-alphabetic ascii characters
76 constexpr std::string_view kSomeAscii = "iJ?9";
77 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
78 kSomeAscii.data(), kSomeAscii.length(), 0)),
79 1); // 'i'
80 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
81 kSomeAscii.data(), kSomeAscii.length(), 1)),
82 1); // 'J'
83 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
84 kSomeAscii.data(), kSomeAscii.length(), 2)),
85 1); // '?'
86 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
87 kSomeAscii.data(), kSomeAscii.length(), 3)),
88 1); // '9'
89
90 constexpr std::string_view kSomeNonAscii = "ñ①カ";
91 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
92 kSomeNonAscii.data(), kSomeNonAscii.length(), 0)),
93 4); // ''
94 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
95 kSomeNonAscii.data(), kSomeNonAscii.length(), 4)),
96 2); // 'ñ'
97 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
98 kSomeNonAscii.data(), kSomeNonAscii.length(), 6)),
99 3); // '①'
100 EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
101 kSomeNonAscii.data(), kSomeNonAscii.length(), 9)),
102 3); // 'カ'
103 }
104
TEST(IcuI18nUtilsTest,SafeTruncate)105 TEST(IcuI18nUtilsTest, SafeTruncate) {
106 // Test alphabetic and non-alphabetic ascii characters
107 constexpr std::string_view kSomeAscii = "iJ?9";
108 std::string truncated(kSomeAscii);
109 i18n_utils::SafeTruncateUtf8(&truncated, kSomeAscii.length() + 1);
110 EXPECT_THAT(truncated, Eq("iJ?9"));
111 truncated = kSomeAscii;
112 i18n_utils::SafeTruncateUtf8(&truncated, kSomeAscii.length());
113 EXPECT_THAT(truncated, Eq("iJ?9"));
114 truncated = kSomeAscii;
115 i18n_utils::SafeTruncateUtf8(&truncated, kSomeAscii.length() - 1);
116 EXPECT_THAT(truncated, Eq("iJ?"));
117
118 constexpr std::string_view kSomeNonAscii = "ñ①カ";
119 truncated = kSomeNonAscii;
120 i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() + 1);
121 EXPECT_THAT(truncated, Eq("ñ①カ"));
122 truncated = kSomeNonAscii;
123 i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length());
124 EXPECT_THAT(truncated, Eq("ñ①カ"));
125 truncated = kSomeNonAscii;
126 i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 1);
127 EXPECT_THAT(truncated, Eq("ñ①"));
128 truncated = kSomeNonAscii;
129 i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 2);
130 EXPECT_THAT(truncated, Eq("ñ①"));
131 truncated = kSomeNonAscii;
132 i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 3);
133 EXPECT_THAT(truncated, Eq("ñ①"));
134 truncated = kSomeNonAscii;
135 i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 4);
136 EXPECT_THAT(truncated, Eq("ñ"));
137 }
138
139 } // namespace
140 } // namespace lib
141 } // namespace icing
142