• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "utils/utf8/unicodetext.h"
18 
19 #include "utils/strings/stringpiece.h"
20 #include "gtest/gtest.h"
21 
22 namespace libtextclassifier3 {
23 namespace {
24 
25 class UnicodeTextTest : public testing::Test {
26  protected:
UnicodeTextTest()27   UnicodeTextTest() : empty_text_() {
28     text_.push_back(0x1C0);
29     text_.push_back(0x4E8C);
30     text_.push_back(0xD7DB);
31     text_.push_back(0x34);
32     text_.push_back(0x1D11E);
33   }
34 
35   UnicodeText empty_text_;
36   UnicodeText text_;
37 };
38 
TEST(UnicodeTextTest,ConstructionFromUnicodeText)39 TEST(UnicodeTextTest, ConstructionFromUnicodeText) {
40   UnicodeText text = UTF8ToUnicodeText("1234��hello", /*do_copy=*/false);
41   EXPECT_EQ(UnicodeText(text).ToUTF8String(), "1234��hello");
42   EXPECT_EQ(UnicodeText(text, /*do_copy=*/false).ToUTF8String(), "1234��hello");
43 }
44 
45 // Tests for our modifications of UnicodeText.
TEST(UnicodeTextTest,Custom)46 TEST(UnicodeTextTest, Custom) {
47   UnicodeText text = UTF8ToUnicodeText("1234��hello", /*do_copy=*/false);
48   EXPECT_EQ(text.ToUTF8String(), "1234��hello");
49   EXPECT_EQ(text.size_codepoints(), 10);
50   EXPECT_EQ(text.size_bytes(), 13);
51 
52   auto it_begin = text.begin();
53   std::advance(it_begin, 4);
54   auto it_end = text.begin();
55   std::advance(it_end, 6);
56   EXPECT_EQ(text.UTF8Substring(it_begin, it_end), "��h");
57 }
58 
TEST(UnicodeTextTest,StringPieceView)59 TEST(UnicodeTextTest, StringPieceView) {
60   std::string raw_text = "1234��hello";
61   UnicodeText text =
62       UTF8ToUnicodeText(StringPiece(raw_text), /*do_copy=*/false);
63   EXPECT_EQ(text.ToUTF8String(), "1234��hello");
64   EXPECT_EQ(text.size_codepoints(), 10);
65   EXPECT_EQ(text.size_bytes(), 13);
66 
67   auto it_begin = text.begin();
68   std::advance(it_begin, 4);
69   auto it_end = text.begin();
70   std::advance(it_end, 6);
71   EXPECT_EQ(text.UTF8Substring(it_begin, it_end), "��h");
72 }
73 
TEST(UnicodeTextTest,Substring)74 TEST(UnicodeTextTest, Substring) {
75   UnicodeText text = UTF8ToUnicodeText("1234��hello", /*do_copy=*/false);
76 
77   EXPECT_EQ(
78       UnicodeText::Substring(std::next(text.begin(), 4),
79                              std::next(text.begin(), 6), /*do_copy=*/true),
80       UTF8ToUnicodeText("��h"));
81   EXPECT_EQ(
82       UnicodeText::Substring(std::next(text.begin(), 4),
83                              std::next(text.begin(), 6), /*do_copy=*/false),
84       UTF8ToUnicodeText("��h"));
85   EXPECT_EQ(UnicodeText::Substring(text, 4, 6, /*do_copy=*/true),
86             UTF8ToUnicodeText("��h"));
87   EXPECT_EQ(UnicodeText::Substring(text, 4, 6, /*do_copy=*/false),
88             UTF8ToUnicodeText("��h"));
89 }
90 
TEST(UnicodeTextTest,Ownership)91 TEST(UnicodeTextTest, Ownership) {
92   const std::string src = "\u304A\u00B0\u106B";
93 
94   UnicodeText alias;
95   alias.PointToUTF8(src.data(), src.size());
96   EXPECT_EQ(alias.data(), src.data());
97   UnicodeText::const_iterator it = alias.begin();
98   EXPECT_EQ(*it++, 0x304A);
99   EXPECT_EQ(*it++, 0x00B0);
100   EXPECT_EQ(*it++, 0x106B);
101   EXPECT_EQ(it, alias.end());
102 
103   UnicodeText t = alias;  // Copy initialization copies the data.
104   EXPECT_NE(t.data(), alias.data());
105 }
106 
TEST(UnicodeTextTest,Validation)107 TEST(UnicodeTextTest, Validation) {
108   EXPECT_TRUE(UTF8ToUnicodeText("1234��hello", /*do_copy=*/false).is_valid());
109   EXPECT_TRUE(
110       UTF8ToUnicodeText("\u304A\u00B0\u106B", /*do_copy=*/false).is_valid());
111   EXPECT_TRUE(
112       UTF8ToUnicodeText("this is a test������", /*do_copy=*/false).is_valid());
113   EXPECT_TRUE(
114       UTF8ToUnicodeText("\xf0\x9f\x98\x8b", /*do_copy=*/false).is_valid());
115   // Too short (string is too short).
116   EXPECT_FALSE(UTF8ToUnicodeText("\xf0\x9f", /*do_copy=*/false).is_valid());
117   // Too long (too many trailing bytes).
118   EXPECT_FALSE(
119       UTF8ToUnicodeText("\xf0\x9f\x98\x8b\x8b", /*do_copy=*/false).is_valid());
120   // Too short (too few trailing bytes).
121   EXPECT_FALSE(
122       UTF8ToUnicodeText("\xf0\x9f\x98\x61\x61", /*do_copy=*/false).is_valid());
123   // Invalid with context.
124   EXPECT_FALSE(
125       UTF8ToUnicodeText("hello \xf0\x9f\x98\x61\x61 world1", /*do_copy=*/false)
126           .is_valid());
127 }
128 
129 class IteratorTest : public UnicodeTextTest {};
130 
TEST_F(IteratorTest,Iterates)131 TEST_F(IteratorTest, Iterates) {
132   UnicodeText::const_iterator iter = text_.begin();
133   EXPECT_EQ(0x1C0, *iter);
134   EXPECT_EQ(&iter, &++iter);  // operator++ returns *this.
135   EXPECT_EQ(0x4E8C, *iter++);
136   EXPECT_EQ(0xD7DB, *iter);
137   // Make sure you can dereference more than once.
138   EXPECT_EQ(0xD7DB, *iter);
139   EXPECT_EQ(0x34, *++iter);
140   EXPECT_EQ(0x1D11E, *++iter);
141   ASSERT_TRUE(iter != text_.end());
142   iter++;
143   EXPECT_TRUE(iter == text_.end());
144 }
145 
TEST_F(IteratorTest,MultiPass)146 TEST_F(IteratorTest, MultiPass) {
147   // Also tests Default Constructible and Assignable.
148   UnicodeText::const_iterator i1, i2;
149   i1 = text_.begin();
150   i2 = i1;
151   EXPECT_EQ(0x4E8C, *++i1);
152   EXPECT_TRUE(i1 != i2);
153   EXPECT_EQ(0x1C0, *i2);
154   ++i2;
155   EXPECT_TRUE(i1 == i2);
156   EXPECT_EQ(0x4E8C, *i2);
157 }
158 
TEST_F(IteratorTest,ReverseIterates)159 TEST_F(IteratorTest, ReverseIterates) {
160   UnicodeText::const_iterator iter = text_.end();
161   EXPECT_TRUE(iter == text_.end());
162   iter--;
163   ASSERT_TRUE(iter != text_.end());
164   EXPECT_EQ(0x1D11E, *iter--);
165   EXPECT_EQ(0x34, *iter);
166   EXPECT_EQ(0xD7DB, *--iter);
167   // Make sure you can dereference more than once.
168   EXPECT_EQ(0xD7DB, *iter);
169   --iter;
170   EXPECT_EQ(0x4E8C, *iter--);
171   EXPECT_EQ(0x1C0, *iter);
172   EXPECT_TRUE(iter == text_.begin());
173 }
174 
TEST_F(IteratorTest,Comparable)175 TEST_F(IteratorTest, Comparable) {
176   UnicodeText::const_iterator i1, i2;
177   i1 = text_.begin();
178   i2 = i1;
179   ++i2;
180 
181   EXPECT_TRUE(i1 < i2);
182   EXPECT_TRUE(text_.begin() <= i1);
183   EXPECT_FALSE(i1 >= i2);
184   EXPECT_FALSE(i1 > text_.end());
185 }
186 
TEST_F(IteratorTest,Advance)187 TEST_F(IteratorTest, Advance) {
188   UnicodeText::const_iterator iter = text_.begin();
189   EXPECT_EQ(0x1C0, *iter);
190   std::advance(iter, 4);
191   EXPECT_EQ(0x1D11E, *iter);
192   ++iter;
193   EXPECT_TRUE(iter == text_.end());
194 }
195 
TEST_F(IteratorTest,Distance)196 TEST_F(IteratorTest, Distance) {
197   UnicodeText::const_iterator iter = text_.begin();
198   EXPECT_EQ(0, std::distance(text_.begin(), iter));
199   EXPECT_EQ(5, std::distance(iter, text_.end()));
200   ++iter;
201   ++iter;
202   EXPECT_EQ(2, std::distance(text_.begin(), iter));
203   EXPECT_EQ(3, std::distance(iter, text_.end()));
204   ++iter;
205   ++iter;
206   EXPECT_EQ(4, std::distance(text_.begin(), iter));
207   ++iter;
208   EXPECT_EQ(0, std::distance(iter, text_.end()));
209 }
210 
211 class OperatorTest : public UnicodeTextTest {};
212 
TEST_F(OperatorTest,Clear)213 TEST_F(OperatorTest, Clear) {
214   UnicodeText empty_text(UTF8ToUnicodeText("", /*do_copy=*/false));
215   EXPECT_FALSE(text_ == empty_text);
216   text_.clear();
217   EXPECT_TRUE(text_ == empty_text);
218 }
219 
TEST_F(OperatorTest,Empty)220 TEST_F(OperatorTest, Empty) {
221   EXPECT_TRUE(empty_text_.empty());
222   EXPECT_FALSE(text_.empty());
223   text_.clear();
224   EXPECT_TRUE(text_.empty());
225 }
226 
227 }  // namespace
228 }  // namespace libtextclassifier3
229