• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/util/i18n-utils.h"
16 
17 #include <memory>
18 
19 #include "gmock/gmock.h"
20 #include "gtest/gtest.h"
21 #include "unicode/uchar.h"
22 
23 namespace icing {
24 namespace lib {
25 namespace {
26 
27 using ::testing::Eq;
28 
TEST(IcuI18nUtilsTest,IsPunctuationAtSameAsIcuIsPunct)29 TEST(IcuI18nUtilsTest, IsPunctuationAtSameAsIcuIsPunct) {
30   // Iterate through ASCII values
31   for (int i = 0; i <= 127; ++i) {
32     char ascii = i;
33 
34     std::string ascii_string = "";
35     ascii_string.push_back(ascii);
36 
37     EXPECT_EQ(i18n_utils::IsPunctuationAt(ascii_string, /*position=*/0),
38 
39               u_ispunct(ascii));
40   }
41 }
42 
TEST(IcuI18nUtilsTest,IsAlphabeticAt)43 TEST(IcuI18nUtilsTest, IsAlphabeticAt) {
44   // Test alphabetic and non-alphabetic ascii characters
45   constexpr std::string_view kSomeAscii = "iJ?9";
46   EXPECT_TRUE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/0));   // 'i'
47   EXPECT_TRUE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/1));   // 'J'
48   EXPECT_FALSE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/2));  // '?'
49   EXPECT_FALSE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/3));  // '9'
50 
51   constexpr std::string_view kSomeNonAscii = "��ñ①カ";
52   EXPECT_FALSE(
53       i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/0));  // '��'
54   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
55                 kSomeNonAscii.data(), kSomeNonAscii.length(), 0)),
56             4);
57   EXPECT_TRUE(
58       i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/4));  // 'ñ'
59   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
60                 kSomeNonAscii.data(), kSomeNonAscii.length(), 4)),
61             2);
62   EXPECT_FALSE(
63       i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/6));  // '①'
64   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
65                 kSomeNonAscii.data(), kSomeNonAscii.length(), 6)),
66             3);
67   EXPECT_TRUE(
68       i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/9));  // 'カ'
69   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
70                 kSomeNonAscii.data(), kSomeNonAscii.length(), 9)),
71             3);
72 }
73 
TEST(IcuI18nUtilsTest,GetUtf8Length)74 TEST(IcuI18nUtilsTest, GetUtf8Length) {
75   // Test alphabetic and non-alphabetic ascii characters
76   constexpr std::string_view kSomeAscii = "iJ?9";
77   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
78                 kSomeAscii.data(), kSomeAscii.length(), 0)),
79             1);  // 'i'
80   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
81                 kSomeAscii.data(), kSomeAscii.length(), 1)),
82             1);  // 'J'
83   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
84                 kSomeAscii.data(), kSomeAscii.length(), 2)),
85             1);  // '?'
86   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
87                 kSomeAscii.data(), kSomeAscii.length(), 3)),
88             1);  // '9'
89 
90   constexpr std::string_view kSomeNonAscii = "��ñ①カ";
91   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
92                 kSomeNonAscii.data(), kSomeNonAscii.length(), 0)),
93             4);  // '��'
94   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
95                 kSomeNonAscii.data(), kSomeNonAscii.length(), 4)),
96             2);  // 'ñ'
97   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
98                 kSomeNonAscii.data(), kSomeNonAscii.length(), 6)),
99             3);  // '①'
100   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
101                 kSomeNonAscii.data(), kSomeNonAscii.length(), 9)),
102             3);  // 'カ'
103 }
104 
TEST(IcuI18nUtilsTest,SafeTruncate)105 TEST(IcuI18nUtilsTest, SafeTruncate) {
106   // Test alphabetic and non-alphabetic ascii characters
107   constexpr std::string_view kSomeAscii = "iJ?9";
108   std::string truncated(kSomeAscii);
109   i18n_utils::SafeTruncateUtf8(&truncated, kSomeAscii.length() + 1);
110   EXPECT_THAT(truncated, Eq("iJ?9"));
111   truncated = kSomeAscii;
112   i18n_utils::SafeTruncateUtf8(&truncated, kSomeAscii.length());
113   EXPECT_THAT(truncated, Eq("iJ?9"));
114   truncated = kSomeAscii;
115   i18n_utils::SafeTruncateUtf8(&truncated, kSomeAscii.length() - 1);
116   EXPECT_THAT(truncated, Eq("iJ?"));
117 
118   constexpr std::string_view kSomeNonAscii = "��ñ①カ";
119   truncated = kSomeNonAscii;
120   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() + 1);
121   EXPECT_THAT(truncated, Eq("��ñ①カ"));
122   truncated = kSomeNonAscii;
123   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length());
124   EXPECT_THAT(truncated, Eq("��ñ①カ"));
125   truncated = kSomeNonAscii;
126   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 1);
127   EXPECT_THAT(truncated, Eq("��ñ①"));
128   truncated = kSomeNonAscii;
129   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 2);
130   EXPECT_THAT(truncated, Eq("��ñ①"));
131   truncated = kSomeNonAscii;
132   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 3);
133   EXPECT_THAT(truncated, Eq("��ñ①"));
134   truncated = kSomeNonAscii;
135   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 4);
136   EXPECT_THAT(truncated, Eq("��ñ"));
137 }
138 
139 }  // namespace
140 }  // namespace lib
141 }  // namespace icing
142