• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 
7 #include "base/logging.h"
8 #include "base/string_piece.h"
9 #include "base/utf_offset_string_conversions.h"
10 #include "testing/gtest/include/gtest/gtest.h"
11 
12 namespace base {
13 
14 namespace {
15 
16 static const size_t kNpos = std::wstring::npos;
17 
18 // Given a null-terminated string of wchar_t with each wchar_t representing
19 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
20 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
21 // should be represented as a surrogate pair (two UTF-16 units)
22 // *even* where wchar_t is 32-bit (Linux and Mac).
23 //
24 // This is to help write tests for functions with string16 params until
25 // the C++ 0x UTF-16 literal is well-supported by compilers.
BuildString16(const wchar_t * s)26 string16 BuildString16(const wchar_t* s) {
27 #if defined(WCHAR_T_IS_UTF16)
28   return string16(s);
29 #elif defined(WCHAR_T_IS_UTF32)
30   string16 u16;
31   while (*s != 0) {
32     DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu);
33     u16.push_back(*s++);
34   }
35   return u16;
36 #endif
37 }
38 
39 }  // namespace
40 
TEST(UTFOffsetStringConversionsTest,AdjustOffset)41 TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
42   struct UTF8ToWideCase {
43     const char* utf8;
44     size_t input_offset;
45     size_t output_offset;
46   } utf8_to_wide_cases[] = {
47     {"", 0, kNpos},
48     {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos},
49     {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
50     {"\xed\xb0\x80z", 3, 1},
51     {"A\xF0\x90\x8C\x80z", 1, 1},
52     {"A\xF0\x90\x8C\x80z", 2, kNpos},
53 #if defined(WCHAR_T_IS_UTF16)
54     {"A\xF0\x90\x8C\x80z", 5, 3},
55 #elif defined(WCHAR_T_IS_UTF32)
56     {"A\xF0\x90\x8C\x80z", 5, 2},
57 #endif
58   };
59   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_wide_cases); ++i) {
60     size_t offset = utf8_to_wide_cases[i].input_offset;
61     UTF8ToWideAndAdjustOffset(utf8_to_wide_cases[i].utf8, &offset);
62     EXPECT_EQ(utf8_to_wide_cases[i].output_offset, offset);
63   }
64 
65 #if defined(WCHAR_T_IS_UTF32)
66   struct UTF16ToWideCase {
67     const wchar_t* wide;
68     size_t input_offset;
69     size_t output_offset;
70   } utf16_to_wide_cases[] = {
71     {L"\xD840\xDC00\x4E00", 0, 0},
72     {L"\xD840\xDC00\x4E00", 1, kNpos},
73     {L"\xD840\xDC00\x4E00", 2, 1},
74   };
75   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_wide_cases); ++i) {
76     size_t offset = utf16_to_wide_cases[i].input_offset;
77     UTF16ToWideAndAdjustOffset(BuildString16(utf16_to_wide_cases[i].wide),
78                                &offset);
79     EXPECT_EQ(utf16_to_wide_cases[i].output_offset, offset);
80   }
81 #endif
82 }
83 
TEST(UTFOffsetStringConversionsTest,LimitOffsets)84 TEST(UTFOffsetStringConversionsTest, LimitOffsets) {
85   const size_t kLimit = 10;
86   const size_t kItems = 20;
87   std::vector<size_t> size_ts;
88   for (size_t t = 0; t < kItems; ++t)
89     size_ts.push_back(t);
90   std::for_each(size_ts.begin(), size_ts.end(),
91                 LimitOffset<std::wstring>(kLimit));
92   size_t unlimited_count = 0;
93   for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
94        ++ti) {
95     if (*ti < kLimit && *ti != kNpos)
96       ++unlimited_count;
97   }
98   EXPECT_EQ(10U, unlimited_count);
99 
100   // Reverse the values in the vector and try again.
101   size_ts.clear();
102   for (size_t t = kItems; t > 0; --t)
103     size_ts.push_back(t - 1);
104   std::for_each(size_ts.begin(), size_ts.end(),
105                 LimitOffset<std::wstring>(kLimit));
106   unlimited_count = 0;
107   for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
108        ++ti) {
109     if (*ti < kLimit && *ti != kNpos)
110       ++unlimited_count;
111   }
112   EXPECT_EQ(10U, unlimited_count);
113 }
114 
TEST(UTFOffsetStringConversionsTest,AdjustOffsets)115 TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
116   // Imagine we have strings as shown in the following cases where the
117   // X's represent encoded characters.
118   // 1: abcXXXdef ==> abcXdef
119   std::vector<size_t> offsets;
120   for (size_t t = 0; t < 9; ++t)
121     offsets.push_back(t);
122   AdjustOffset::Adjustments adjustments;
123   adjustments.push_back(AdjustOffset::Adjustment(3, 3, 1));
124   std::for_each(offsets.begin(), offsets.end(), AdjustOffset(adjustments));
125   size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
126   EXPECT_EQ(offsets.size(), arraysize(expected_1));
127   for (size_t i = 0; i < arraysize(expected_1); ++i)
128     EXPECT_EQ(expected_1[i], offsets[i]);
129 
130   // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX
131   offsets.clear();
132   for (size_t t = 0; t < 23; ++t)
133     offsets.push_back(t);
134   adjustments.clear();
135   adjustments.push_back(AdjustOffset::Adjustment(0, 3, 1));
136   adjustments.push_back(AdjustOffset::Adjustment(4, 4, 2));
137   adjustments.push_back(AdjustOffset::Adjustment(10, 7, 4));
138   adjustments.push_back(AdjustOffset::Adjustment(20, 3, 1));
139   std::for_each(offsets.begin(), offsets.end(), AdjustOffset(adjustments));
140   size_t expected_2[] = {0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6,
141                          kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 10, 11, 12,
142                          13, kNpos, kNpos};
143   EXPECT_EQ(offsets.size(), arraysize(expected_2));
144   for (size_t i = 0; i < arraysize(expected_2); ++i)
145     EXPECT_EQ(expected_2[i], offsets[i]);
146 
147   // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe
148   offsets.clear();
149   for (size_t t = 0; t < 17; ++t)
150     offsets.push_back(t);
151   adjustments.clear();
152   adjustments.push_back(AdjustOffset::Adjustment(0, 3, 0));
153   adjustments.push_back(AdjustOffset::Adjustment(4, 4, 4));
154   adjustments.push_back(AdjustOffset::Adjustment(11, 3, 3));
155   adjustments.push_back(AdjustOffset::Adjustment(15, 2, 0));
156   std::for_each(offsets.begin(), offsets.end(), AdjustOffset(adjustments));
157   size_t expected_3[] = {kNpos, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6,
158                          7, 8, kNpos, kNpos, 11, kNpos, kNpos};
159   EXPECT_EQ(offsets.size(), arraysize(expected_3));
160   for (size_t i = 0; i < arraysize(expected_3); ++i)
161     EXPECT_EQ(expected_3[i], offsets[i]);
162 }
163 
164 }  // namaspace base
165