1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Unicode_test"
18 #include <utils/Log.h>
19 #include <utils/Unicode.h>
20
21 #include <gtest/gtest.h>
22
23 namespace android {
24
25 class UnicodeTest : public testing::Test {
26 protected:
SetUp()27 virtual void SetUp() {
28 }
29
TearDown()30 virtual void TearDown() {
31 }
32
33 char16_t const * const kSearchString = u"I am a leaf on the wind.";
34 };
35
TEST_F(UnicodeTest,UTF8toUTF16ZeroLength)36 TEST_F(UnicodeTest, UTF8toUTF16ZeroLength) {
37 ssize_t measured;
38
39 const uint8_t str[] = { };
40
41 measured = utf8_to_utf16_length(str, 0);
42 EXPECT_EQ(0, measured)
43 << "Zero length input should return zero length output.";
44 }
45
TEST_F(UnicodeTest,UTF8toUTF16ASCIILength)46 TEST_F(UnicodeTest, UTF8toUTF16ASCIILength) {
47 ssize_t measured;
48
49 // U+0030 or ASCII '0'
50 const uint8_t str[] = { 0x30 };
51
52 measured = utf8_to_utf16_length(str, sizeof(str));
53 EXPECT_EQ(1, measured)
54 << "ASCII glyphs should have a length of 1 char16_t";
55 }
56
TEST_F(UnicodeTest,UTF8toUTF16Plane1Length)57 TEST_F(UnicodeTest, UTF8toUTF16Plane1Length) {
58 ssize_t measured;
59
60 // U+2323 SMILE
61 const uint8_t str[] = { 0xE2, 0x8C, 0xA3 };
62
63 measured = utf8_to_utf16_length(str, sizeof(str));
64 EXPECT_EQ(1, measured)
65 << "Plane 1 glyphs should have a length of 1 char16_t";
66 }
67
TEST_F(UnicodeTest,UTF8toUTF16SurrogateLength)68 TEST_F(UnicodeTest, UTF8toUTF16SurrogateLength) {
69 ssize_t measured;
70
71 // U+10000
72 const uint8_t str[] = { 0xF0, 0x90, 0x80, 0x80 };
73
74 measured = utf8_to_utf16_length(str, sizeof(str));
75 EXPECT_EQ(2, measured)
76 << "Surrogate pairs should have a length of 2 char16_t";
77 }
78
TEST_F(UnicodeTest,UTF8toUTF16TruncatedUTF8)79 TEST_F(UnicodeTest, UTF8toUTF16TruncatedUTF8) {
80 ssize_t measured;
81
82 // Truncated U+2323 SMILE
83 // U+2323 SMILE
84 const uint8_t str[] = { 0xE2, 0x8C };
85
86 measured = utf8_to_utf16_length(str, sizeof(str));
87 EXPECT_EQ(-1, measured)
88 << "Truncated UTF-8 should return -1 to indicate invalid";
89 }
90
TEST_F(UnicodeTest,UTF8toUTF16Normal)91 TEST_F(UnicodeTest, UTF8toUTF16Normal) {
92 const uint8_t str[] = {
93 0x30, // U+0030, 1 UTF-16 character
94 0xC4, 0x80, // U+0100, 1 UTF-16 character
95 0xE2, 0x8C, 0xA3, // U+2323, 1 UTF-16 character
96 0xF0, 0x90, 0x80, 0x80, // U+10000, 2 UTF-16 character
97 };
98
99 char16_t output[1 + 1 + 1 + 2 + 1]; // Room for NULL
100
101 utf8_to_utf16(str, sizeof(str), output, sizeof(output) / sizeof(output[0]));
102
103 EXPECT_EQ(0x0030, output[0])
104 << "should be U+0030";
105 EXPECT_EQ(0x0100, output[1])
106 << "should be U+0100";
107 EXPECT_EQ(0x2323, output[2])
108 << "should be U+2323";
109 EXPECT_EQ(0xD800, output[3])
110 << "should be first half of surrogate U+10000";
111 EXPECT_EQ(0xDC00, output[4])
112 << "should be second half of surrogate U+10000";
113 EXPECT_EQ(NULL, output[5])
114 << "should be NULL terminated";
115 }
116
TEST_F(UnicodeTest,strstr16EmptyTarget)117 TEST_F(UnicodeTest, strstr16EmptyTarget) {
118 EXPECT_EQ(strstr16(kSearchString, u""), kSearchString)
119 << "should return the original pointer";
120 }
121
TEST_F(UnicodeTest,strstr16SameString)122 TEST_F(UnicodeTest, strstr16SameString) {
123 const char16_t* result = strstr16(kSearchString, kSearchString);
124 EXPECT_EQ(kSearchString, result)
125 << "should return the original pointer";
126 }
127
TEST_F(UnicodeTest,strstr16TargetStartOfString)128 TEST_F(UnicodeTest, strstr16TargetStartOfString) {
129 const char16_t* result = strstr16(kSearchString, u"I am");
130 EXPECT_EQ(kSearchString, result)
131 << "should return the original pointer";
132 }
133
134
TEST_F(UnicodeTest,strstr16TargetEndOfString)135 TEST_F(UnicodeTest, strstr16TargetEndOfString) {
136 const char16_t* result = strstr16(kSearchString, u"wind.");
137 EXPECT_EQ(kSearchString+19, result);
138 }
139
TEST_F(UnicodeTest,strstr16TargetWithinString)140 TEST_F(UnicodeTest, strstr16TargetWithinString) {
141 const char16_t* result = strstr16(kSearchString, u"leaf");
142 EXPECT_EQ(kSearchString+7, result);
143 }
144
TEST_F(UnicodeTest,strstr16TargetNotPresent)145 TEST_F(UnicodeTest, strstr16TargetNotPresent) {
146 const char16_t* result = strstr16(kSearchString, u"soar");
147 EXPECT_EQ(nullptr, result);
148 }
149
150 // http://b/29267949
151 // Test that overreading in utf8_to_utf16_length is detected
TEST_F(UnicodeTest,InvalidUtf8OverreadDetected)152 TEST_F(UnicodeTest, InvalidUtf8OverreadDetected) {
153 // An utf8 char starting with \xc4 is two bytes long.
154 // Add extra zeros so no extra memory is read in case the code doesn't
155 // work as expected.
156 static char utf8[] = "\xc4\x00\x00\x00";
157 ASSERT_DEATH(utf8_to_utf16_length((uint8_t *) utf8, strlen(utf8),
158 true /* overreadIsFatal */), "" /* regex for ASSERT_DEATH */);
159 }
160
161 }
162