1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "llvm/Support/ConvertUTF.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/Support/Format.h"
13 #include "gtest/gtest.h"
14 #include <string>
15 #include <utility>
16 #include <vector>
17
18 using namespace llvm;
19
TEST(ConvertUTFTest,ConvertUTF16LittleEndianToUTF8String)20 TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
21 // Src is the look of disapproval.
22 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
23 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
24 std::string Result;
25 bool Success = convertUTF16ToUTF8String(Ref, Result);
26 EXPECT_TRUE(Success);
27 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
28 EXPECT_EQ(Expected, Result);
29 }
30
TEST(ConvertUTFTest,ConvertUTF16BigEndianToUTF8String)31 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
32 // Src is the look of disapproval.
33 static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
34 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
35 std::string Result;
36 bool Success = convertUTF16ToUTF8String(Ref, Result);
37 EXPECT_TRUE(Success);
38 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
39 EXPECT_EQ(Expected, Result);
40 }
41
TEST(ConvertUTFTest,ConvertUTF8ToUTF16String)42 TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
43 // Src is the look of disapproval.
44 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
45 StringRef Ref(Src, sizeof(Src) - 1);
46 SmallVector<UTF16, 5> Result;
47 bool Success = convertUTF8ToUTF16String(Ref, Result);
48 EXPECT_TRUE(Success);
49 static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
50 ASSERT_EQ(3u, Result.size());
51 for (int I = 0, E = 3; I != E; ++I)
52 EXPECT_EQ(Expected[I], Result[I]);
53 }
54
TEST(ConvertUTFTest,OddLengthInput)55 TEST(ConvertUTFTest, OddLengthInput) {
56 std::string Result;
57 bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
58 EXPECT_FALSE(Success);
59 }
60
TEST(ConvertUTFTest,Empty)61 TEST(ConvertUTFTest, Empty) {
62 std::string Result;
63 bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result);
64 EXPECT_TRUE(Success);
65 EXPECT_TRUE(Result.empty());
66 }
67
TEST(ConvertUTFTest,HasUTF16BOM)68 TEST(ConvertUTFTest, HasUTF16BOM) {
69 bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
70 EXPECT_TRUE(HasBOM);
71 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
72 EXPECT_TRUE(HasBOM);
73 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
74 EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
75 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
76 EXPECT_TRUE(HasBOM);
77
78 HasBOM = hasUTF16ByteOrderMark(None);
79 EXPECT_FALSE(HasBOM);
80 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
81 EXPECT_FALSE(HasBOM);
82 }
83
TEST(ConvertUTFTest,UTF16WrappersForConvertUTF16ToUTF8String)84 TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
85 // Src is the look of disapproval.
86 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
87 ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4);
88 std::string Result;
89 bool Success = convertUTF16ToUTF8String(SrcRef, Result);
90 EXPECT_TRUE(Success);
91 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
92 EXPECT_EQ(Expected, Result);
93 }
94
TEST(ConvertUTFTest,ConvertUTF8toWide)95 TEST(ConvertUTFTest, ConvertUTF8toWide) {
96 // Src is the look of disapproval.
97 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
98 std::wstring Result;
99 bool Success = ConvertUTF8toWide((const char*)Src, Result);
100 EXPECT_TRUE(Success);
101 std::wstring Expected(L"\x0ca0_\x0ca0");
102 EXPECT_EQ(Expected, Result);
103 Result.clear();
104 Success = ConvertUTF8toWide(StringRef(Src, 7), Result);
105 EXPECT_TRUE(Success);
106 EXPECT_EQ(Expected, Result);
107 }
108
TEST(ConvertUTFTest,convertWideToUTF8)109 TEST(ConvertUTFTest, convertWideToUTF8) {
110 // Src is the look of disapproval.
111 static const wchar_t Src[] = L"\x0ca0_\x0ca0";
112 std::string Result;
113 bool Success = convertWideToUTF8(Src, Result);
114 EXPECT_TRUE(Success);
115 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
116 EXPECT_EQ(Expected, Result);
117 }
118
119 struct ConvertUTFResultContainer {
120 ConversionResult ErrorCode;
121 std::vector<unsigned> UnicodeScalars;
122
ConvertUTFResultContainerConvertUTFResultContainer123 ConvertUTFResultContainer(ConversionResult ErrorCode)
124 : ErrorCode(ErrorCode) {}
125
126 ConvertUTFResultContainer
withScalarsConvertUTFResultContainer127 withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
128 unsigned US2 = 0x110000, unsigned US3 = 0x110000,
129 unsigned US4 = 0x110000, unsigned US5 = 0x110000,
130 unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
131 ConvertUTFResultContainer Result(*this);
132 if (US0 != 0x110000)
133 Result.UnicodeScalars.push_back(US0);
134 if (US1 != 0x110000)
135 Result.UnicodeScalars.push_back(US1);
136 if (US2 != 0x110000)
137 Result.UnicodeScalars.push_back(US2);
138 if (US3 != 0x110000)
139 Result.UnicodeScalars.push_back(US3);
140 if (US4 != 0x110000)
141 Result.UnicodeScalars.push_back(US4);
142 if (US5 != 0x110000)
143 Result.UnicodeScalars.push_back(US5);
144 if (US6 != 0x110000)
145 Result.UnicodeScalars.push_back(US6);
146 if (US7 != 0x110000)
147 Result.UnicodeScalars.push_back(US7);
148 return Result;
149 }
150 };
151
152 std::pair<ConversionResult, std::vector<unsigned>>
ConvertUTF8ToUnicodeScalarsLenient(StringRef S)153 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
154 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
155
156 const UTF8 *SourceNext = SourceStart;
157 std::vector<UTF32> Decoded(S.size(), 0);
158 UTF32 *TargetStart = Decoded.data();
159
160 auto ErrorCode =
161 ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
162 Decoded.data() + Decoded.size(), lenientConversion);
163
164 Decoded.resize(TargetStart - Decoded.data());
165
166 return std::make_pair(ErrorCode, Decoded);
167 }
168
169 std::pair<ConversionResult, std::vector<unsigned>>
ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S)170 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
171 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
172
173 const UTF8 *SourceNext = SourceStart;
174 std::vector<UTF32> Decoded(S.size(), 0);
175 UTF32 *TargetStart = Decoded.data();
176
177 auto ErrorCode = ConvertUTF8toUTF32Partial(
178 &SourceNext, SourceStart + S.size(), &TargetStart,
179 Decoded.data() + Decoded.size(), lenientConversion);
180
181 Decoded.resize(TargetStart - Decoded.data());
182
183 return std::make_pair(ErrorCode, Decoded);
184 }
185
186 ::testing::AssertionResult
CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,StringRef S,bool Partial=false)187 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
188 StringRef S, bool Partial = false) {
189 ConversionResult ErrorCode;
190 std::vector<unsigned> Decoded;
191 if (!Partial)
192 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
193 else
194 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
195
196 if (Expected.ErrorCode != ErrorCode)
197 return ::testing::AssertionFailure() << "Expected error code "
198 << Expected.ErrorCode << ", actual "
199 << ErrorCode;
200
201 if (Expected.UnicodeScalars != Decoded)
202 return ::testing::AssertionFailure()
203 << "Expected lenient decoded result:\n"
204 << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
205 << "Actual result:\n" << ::testing::PrintToString(Decoded);
206
207 return ::testing::AssertionSuccess();
208 }
209
TEST(ConvertUTFTest,UTF8ToUTF32Lenient)210 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
211
212 //
213 // 1-byte sequences
214 //
215
216 // U+0041 LATIN CAPITAL LETTER A
217 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
218 ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
219
220 //
221 // 2-byte sequences
222 //
223
224 // U+0283 LATIN SMALL LETTER ESH
225 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
226 ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
227 "\xca\x83"));
228
229 // U+03BA GREEK SMALL LETTER KAPPA
230 // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
231 // U+03C3 GREEK SMALL LETTER SIGMA
232 // U+03BC GREEK SMALL LETTER MU
233 // U+03B5 GREEK SMALL LETTER EPSILON
234 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
235 ConvertUTFResultContainer(conversionOK)
236 .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
237 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
238
239 //
240 // 3-byte sequences
241 //
242
243 // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
244 // U+6587 CJK UNIFIED IDEOGRAPH-6587
245 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
246 ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
247 "\xe4\xbe\x8b\xe6\x96\x87"));
248
249 // U+D55C HANGUL SYLLABLE HAN
250 // U+AE00 HANGUL SYLLABLE GEUL
251 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
252 ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
253 "\xed\x95\x9c\xea\xb8\x80"));
254
255 // U+1112 HANGUL CHOSEONG HIEUH
256 // U+1161 HANGUL JUNGSEONG A
257 // U+11AB HANGUL JONGSEONG NIEUN
258 // U+1100 HANGUL CHOSEONG KIYEOK
259 // U+1173 HANGUL JUNGSEONG EU
260 // U+11AF HANGUL JONGSEONG RIEUL
261 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
262 ConvertUTFResultContainer(conversionOK)
263 .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
264 "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
265 "\xe1\x86\xaf"));
266
267 //
268 // 4-byte sequences
269 //
270
271 // U+E0100 VARIATION SELECTOR-17
272 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
273 ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
274 "\xf3\xa0\x84\x80"));
275
276 //
277 // First possible sequence of a certain length
278 //
279
280 // U+0000 NULL
281 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
282 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
283 StringRef("\x00", 1)));
284
285 // U+0080 PADDING CHARACTER
286 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
287 ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
288 "\xc2\x80"));
289
290 // U+0800 SAMARITAN LETTER ALAF
291 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
292 ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
293 "\xe0\xa0\x80"));
294
295 // U+10000 LINEAR B SYLLABLE B008 A
296 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
297 ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
298 "\xf0\x90\x80\x80"));
299
300 // U+200000 (invalid)
301 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
302 ConvertUTFResultContainer(sourceIllegal)
303 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
304 "\xf8\x88\x80\x80\x80"));
305
306 // U+4000000 (invalid)
307 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
308 ConvertUTFResultContainer(sourceIllegal)
309 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
310 "\xfc\x84\x80\x80\x80\x80"));
311
312 //
313 // Last possible sequence of a certain length
314 //
315
316 // U+007F DELETE
317 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
318 ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
319
320 // U+07FF (unassigned)
321 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
322 ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
323 "\xdf\xbf"));
324
325 // U+FFFF (noncharacter)
326 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
327 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
328 "\xef\xbf\xbf"));
329
330 // U+1FFFFF (invalid)
331 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
332 ConvertUTFResultContainer(sourceIllegal)
333 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
334 "\xf7\xbf\xbf\xbf"));
335
336 // U+3FFFFFF (invalid)
337 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
338 ConvertUTFResultContainer(sourceIllegal)
339 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
340 "\xfb\xbf\xbf\xbf\xbf"));
341
342 // U+7FFFFFFF (invalid)
343 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
344 ConvertUTFResultContainer(sourceIllegal)
345 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
346 "\xfd\xbf\xbf\xbf\xbf\xbf"));
347
348 //
349 // Other boundary conditions
350 //
351
352 // U+D7FF (unassigned)
353 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
354 ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
355 "\xed\x9f\xbf"));
356
357 // U+E000 (private use)
358 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
359 ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
360 "\xee\x80\x80"));
361
362 // U+FFFD REPLACEMENT CHARACTER
363 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
364 ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
365 "\xef\xbf\xbd"));
366
367 // U+10FFFF (noncharacter)
368 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
369 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
370 "\xf4\x8f\xbf\xbf"));
371
372 // U+110000 (invalid)
373 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
374 ConvertUTFResultContainer(sourceIllegal)
375 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
376 "\xf4\x90\x80\x80"));
377
378 //
379 // Unexpected continuation bytes
380 //
381
382 // A sequence of unexpected continuation bytes that don't follow a first
383 // byte, every byte is a maximal subpart.
384
385 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
386 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
387 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
388 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
389 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
390 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
391 "\x80\x80"));
392 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
393 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
394 "\x80\xbf"));
395 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
396 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
397 "\xbf\x80"));
398 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
399 ConvertUTFResultContainer(sourceIllegal)
400 .withScalars(0xfffd, 0xfffd, 0xfffd),
401 "\x80\xbf\x80"));
402 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
403 ConvertUTFResultContainer(sourceIllegal)
404 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
405 "\x80\xbf\x80\xbf"));
406 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
407 ConvertUTFResultContainer(sourceIllegal)
408 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
409 "\x80\xbf\x82\xbf\xaa"));
410 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
411 ConvertUTFResultContainer(sourceIllegal)
412 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
413 "\xaa\xb0\xbb\xbf\xaa\xa0"));
414 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
415 ConvertUTFResultContainer(sourceIllegal)
416 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
417 "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
418
419 // All continuation bytes (0x80--0xbf).
420 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
421 ConvertUTFResultContainer(sourceIllegal)
422 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
423 0xfffd, 0xfffd, 0xfffd, 0xfffd)
424 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
425 0xfffd, 0xfffd, 0xfffd, 0xfffd)
426 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
427 0xfffd, 0xfffd, 0xfffd, 0xfffd)
428 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
429 0xfffd, 0xfffd, 0xfffd, 0xfffd)
430 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
431 0xfffd, 0xfffd, 0xfffd, 0xfffd)
432 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
433 0xfffd, 0xfffd, 0xfffd, 0xfffd)
434 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
435 0xfffd, 0xfffd, 0xfffd, 0xfffd)
436 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
437 0xfffd, 0xfffd, 0xfffd, 0xfffd),
438 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
439 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
440 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
441 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
442
443 //
444 // Lonely start bytes
445 //
446
447 // Start bytes of 2-byte sequences (0xc0--0xdf).
448 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
449 ConvertUTFResultContainer(sourceIllegal)
450 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
451 0xfffd, 0xfffd, 0xfffd, 0xfffd)
452 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
453 0xfffd, 0xfffd, 0xfffd, 0xfffd)
454 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
455 0xfffd, 0xfffd, 0xfffd, 0xfffd)
456 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
457 0xfffd, 0xfffd, 0xfffd, 0xfffd),
458 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
459 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
460
461 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
462 ConvertUTFResultContainer(sourceIllegal)
463 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
464 0xfffd, 0x0020, 0xfffd, 0x0020)
465 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
466 0xfffd, 0x0020, 0xfffd, 0x0020)
467 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
468 0xfffd, 0x0020, 0xfffd, 0x0020)
469 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
470 0xfffd, 0x0020, 0xfffd, 0x0020)
471 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
472 0xfffd, 0x0020, 0xfffd, 0x0020)
473 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
474 0xfffd, 0x0020, 0xfffd, 0x0020)
475 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
476 0xfffd, 0x0020, 0xfffd, 0x0020)
477 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
478 0xfffd, 0x0020, 0xfffd, 0x0020),
479 "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
480 "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
481 "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
482 "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
483
484 // Start bytes of 3-byte sequences (0xe0--0xef).
485 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
486 ConvertUTFResultContainer(sourceIllegal)
487 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
488 0xfffd, 0xfffd, 0xfffd, 0xfffd)
489 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
490 0xfffd, 0xfffd, 0xfffd, 0xfffd),
491 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
492
493 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
494 ConvertUTFResultContainer(sourceIllegal)
495 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
496 0xfffd, 0x0020, 0xfffd, 0x0020)
497 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
498 0xfffd, 0x0020, 0xfffd, 0x0020)
499 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
500 0xfffd, 0x0020, 0xfffd, 0x0020)
501 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
502 0xfffd, 0x0020, 0xfffd, 0x0020),
503 "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
504 "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
505
506 // Start bytes of 4-byte sequences (0xf0--0xf7).
507 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
508 ConvertUTFResultContainer(sourceIllegal)
509 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
510 0xfffd, 0xfffd, 0xfffd, 0xfffd),
511 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
512
513 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
514 ConvertUTFResultContainer(sourceIllegal)
515 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
516 0xfffd, 0x0020, 0xfffd, 0x0020)
517 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
518 0xfffd, 0x0020, 0xfffd, 0x0020),
519 "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
520
521 // Start bytes of 5-byte sequences (0xf8--0xfb).
522 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
523 ConvertUTFResultContainer(sourceIllegal)
524 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
525 "\xf8\xf9\xfa\xfb"));
526
527 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
528 ConvertUTFResultContainer(sourceIllegal)
529 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
530 0xfffd, 0x0020, 0xfffd, 0x0020),
531 "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
532
533 // Start bytes of 6-byte sequences (0xfc--0xfd).
534 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
535 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
536 "\xfc\xfd"));
537
538 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
539 ConvertUTFResultContainer(sourceIllegal)
540 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
541 "\xfc\x20\xfd\x20"));
542
543 //
544 // Other bytes (0xc0--0xc1, 0xfe--0xff).
545 //
546
547 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
548 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
549 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
550 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
551 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
552 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
553 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
554 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
555
556 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
557 ConvertUTFResultContainer(sourceIllegal)
558 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
559 "\xc0\xc1\xfe\xff"));
560
561 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
562 ConvertUTFResultContainer(sourceIllegal)
563 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
564 "\xfe\xfe\xff\xff"));
565
566 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
567 ConvertUTFResultContainer(sourceIllegal)
568 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
569 "\xfe\x80\x80\x80\x80\x80"));
570
571 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
572 ConvertUTFResultContainer(sourceIllegal)
573 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
574 "\xff\x80\x80\x80\x80\x80"));
575
576 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
577 ConvertUTFResultContainer(sourceIllegal)
578 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
579 0xfffd, 0x0020, 0xfffd, 0x0020),
580 "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
581
582 //
583 // Sequences with one continuation byte missing
584 //
585
586 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
587 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
588 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
589 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
590 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
591 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
592 "\xe0\xa0"));
593 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
594 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
595 "\xe0\xbf"));
596 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
597 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
598 "\xe1\x80"));
599 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
600 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
601 "\xec\xbf"));
602 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
603 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
604 "\xed\x80"));
605 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
606 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
607 "\xed\x9f"));
608 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
609 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
610 "\xee\x80"));
611 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
612 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
613 "\xef\xbf"));
614 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
615 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
616 "\xf0\x90\x80"));
617 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
618 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
619 "\xf0\xbf\xbf"));
620 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
621 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
622 "\xf1\x80\x80"));
623 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
624 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
625 "\xf3\xbf\xbf"));
626 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
627 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
628 "\xf4\x80\x80"));
629 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
630 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
631 "\xf4\x8f\xbf"));
632
633 // Overlong sequences with one trailing byte missing.
634 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
635 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
636 "\xc0"));
637 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
638 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
639 "\xc1"));
640 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
641 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
642 "\xe0\x80"));
643 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
644 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
645 "\xe0\x9f"));
646 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
647 ConvertUTFResultContainer(sourceIllegal)
648 .withScalars(0xfffd, 0xfffd, 0xfffd),
649 "\xf0\x80\x80"));
650 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
651 ConvertUTFResultContainer(sourceIllegal)
652 .withScalars(0xfffd, 0xfffd, 0xfffd),
653 "\xf0\x8f\x80"));
654 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
655 ConvertUTFResultContainer(sourceIllegal)
656 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
657 "\xf8\x80\x80\x80"));
658 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
659 ConvertUTFResultContainer(sourceIllegal)
660 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
661 "\xfc\x80\x80\x80\x80"));
662
663 // Sequences that represent surrogates with one trailing byte missing.
664 // High surrogates
665 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
666 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
667 "\xed\xa0"));
668 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
669 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
670 "\xed\xac"));
671 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
672 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
673 "\xed\xaf"));
674 // Low surrogates
675 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
676 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
677 "\xed\xb0"));
678 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
679 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
680 "\xed\xb4"));
681 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
682 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
683 "\xed\xbf"));
684
685 // Ill-formed 4-byte sequences.
686 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
687 // U+1100xx (invalid)
688 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
689 ConvertUTFResultContainer(sourceIllegal)
690 .withScalars(0xfffd, 0xfffd, 0xfffd),
691 "\xf4\x90\x80"));
692 // U+13FBxx (invalid)
693 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
694 ConvertUTFResultContainer(sourceIllegal)
695 .withScalars(0xfffd, 0xfffd, 0xfffd),
696 "\xf4\xbf\xbf"));
697 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
698 ConvertUTFResultContainer(sourceIllegal)
699 .withScalars(0xfffd, 0xfffd, 0xfffd),
700 "\xf5\x80\x80"));
701 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
702 ConvertUTFResultContainer(sourceIllegal)
703 .withScalars(0xfffd, 0xfffd, 0xfffd),
704 "\xf6\x80\x80"));
705 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
706 ConvertUTFResultContainer(sourceIllegal)
707 .withScalars(0xfffd, 0xfffd, 0xfffd),
708 "\xf7\x80\x80"));
709 // U+1FFBxx (invalid)
710 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
711 ConvertUTFResultContainer(sourceIllegal)
712 .withScalars(0xfffd, 0xfffd, 0xfffd),
713 "\xf7\xbf\xbf"));
714
715 // Ill-formed 5-byte sequences.
716 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
717 // U+2000xx (invalid)
718 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
719 ConvertUTFResultContainer(sourceIllegal)
720 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
721 "\xf8\x88\x80\x80"));
722 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
723 ConvertUTFResultContainer(sourceIllegal)
724 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
725 "\xf8\xbf\xbf\xbf"));
726 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
727 ConvertUTFResultContainer(sourceIllegal)
728 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
729 "\xf9\x80\x80\x80"));
730 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
731 ConvertUTFResultContainer(sourceIllegal)
732 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
733 "\xfa\x80\x80\x80"));
734 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
735 ConvertUTFResultContainer(sourceIllegal)
736 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
737 "\xfb\x80\x80\x80"));
738 // U+3FFFFxx (invalid)
739 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
740 ConvertUTFResultContainer(sourceIllegal)
741 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
742 "\xfb\xbf\xbf\xbf"));
743
744 // Ill-formed 6-byte sequences.
745 // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
746 // U+40000xx (invalid)
747 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
748 ConvertUTFResultContainer(sourceIllegal)
749 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
750 "\xfc\x84\x80\x80\x80"));
751 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
752 ConvertUTFResultContainer(sourceIllegal)
753 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
754 "\xfc\xbf\xbf\xbf\xbf"));
755 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
756 ConvertUTFResultContainer(sourceIllegal)
757 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
758 "\xfd\x80\x80\x80\x80"));
759 // U+7FFFFFxx (invalid)
760 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
761 ConvertUTFResultContainer(sourceIllegal)
762 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
763 "\xfd\xbf\xbf\xbf\xbf"));
764
765 //
766 // Sequences with two continuation bytes missing
767 //
768
769 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
770 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
771 "\xf0\x90"));
772 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
773 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
774 "\xf0\xbf"));
775 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
776 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
777 "\xf1\x80"));
778 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
779 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
780 "\xf3\xbf"));
781 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
782 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
783 "\xf4\x80"));
784 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
785 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
786 "\xf4\x8f"));
787
788 // Overlong sequences with two trailing byte missing.
789 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
790 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
791 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
792 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
793 "\xf0\x80"));
794 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
795 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
796 "\xf0\x8f"));
797 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
798 ConvertUTFResultContainer(sourceIllegal)
799 .withScalars(0xfffd, 0xfffd, 0xfffd),
800 "\xf8\x80\x80"));
801 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
802 ConvertUTFResultContainer(sourceIllegal)
803 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
804 "\xfc\x80\x80\x80"));
805
806 // Sequences that represent surrogates with two trailing bytes missing.
807 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
808 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
809
810 // Ill-formed 4-byte sequences.
811 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
812 // U+110yxx (invalid)
813 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
814 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
815 "\xf4\x90"));
816 // U+13Fyxx (invalid)
817 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
818 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
819 "\xf4\xbf"));
820 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
821 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
822 "\xf5\x80"));
823 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
824 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
825 "\xf6\x80"));
826 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
827 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
828 "\xf7\x80"));
829 // U+1FFyxx (invalid)
830 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
831 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
832 "\xf7\xbf"));
833
834 // Ill-formed 5-byte sequences.
835 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
836 // U+200yxx (invalid)
837 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
838 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
839 "\xf8\x88\x80"));
840 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
841 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
842 "\xf8\xbf\xbf"));
843 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
844 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
845 "\xf9\x80\x80"));
846 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
847 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
848 "\xfa\x80\x80"));
849 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
850 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
851 "\xfb\x80\x80"));
852 // U+3FFFyxx (invalid)
853 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
854 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
855 "\xfb\xbf\xbf"));
856
857 // Ill-formed 6-byte sequences.
858 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
859 // U+4000yxx (invalid)
860 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
861 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
862 "\xfc\x84\x80\x80"));
863 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
864 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
865 "\xfc\xbf\xbf\xbf"));
866 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
867 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
868 "\xfd\x80\x80\x80"));
869 // U+7FFFFyxx (invalid)
870 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
871 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
872 "\xfd\xbf\xbf\xbf"));
873
874 //
875 // Sequences with three continuation bytes missing
876 //
877
878 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
879 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
880 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
881 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
882 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
883 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
884 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
885 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
886 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
887 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
888
889 // Broken overlong sequences.
890 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
891 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
892 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
893 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
894 "\xf8\x80"));
895 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
896 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
897 "\xfc\x80\x80"));
898
899 // Ill-formed 4-byte sequences.
900 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
901 // U+14yyxx (invalid)
902 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
903 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
904 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
905 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
906 // U+1Cyyxx (invalid)
907 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
908 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
909
910 // Ill-formed 5-byte sequences.
911 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
912 // U+20yyxx (invalid)
913 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
914 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
915 "\xf8\x88"));
916 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
917 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
918 "\xf8\xbf"));
919 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
920 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
921 "\xf9\x80"));
922 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
923 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
924 "\xfa\x80"));
925 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
926 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
927 "\xfb\x80"));
928 // U+3FCyyxx (invalid)
929 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
930 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
931 "\xfb\xbf"));
932
933 // Ill-formed 6-byte sequences.
934 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
935 // U+400yyxx (invalid)
936 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
937 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
938 "\xfc\x84\x80"));
939 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
940 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
941 "\xfc\xbf\xbf"));
942 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
943 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
944 "\xfd\x80\x80"));
945 // U+7FFCyyxx (invalid)
946 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
947 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
948 "\xfd\xbf\xbf"));
949
950 //
951 // Sequences with four continuation bytes missing
952 //
953
954 // Ill-formed 5-byte sequences.
955 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
956 // U+uzyyxx (invalid)
957 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
958 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
959 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
960 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
961 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
962 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
963 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
964 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
965 // U+3zyyxx (invalid)
966 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
967 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
968
969 // Broken overlong sequences.
970 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
971 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
972 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
973 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
974 "\xfc\x80"));
975
976 // Ill-formed 6-byte sequences.
977 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
978 // U+uzzyyxx (invalid)
979 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
980 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
981 "\xfc\x84"));
982 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
983 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
984 "\xfc\xbf"));
985 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
986 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
987 "\xfd\x80"));
988 // U+7Fzzyyxx (invalid)
989 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
990 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
991 "\xfd\xbf"));
992
993 //
994 // Sequences with five continuation bytes missing
995 //
996
997 // Ill-formed 6-byte sequences.
998 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
999 // U+uzzyyxx (invalid)
1000 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1001 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
1002 // U+uuzzyyxx (invalid)
1003 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1004 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
1005
1006 //
1007 // Consecutive sequences with trailing bytes missing
1008 //
1009
1010 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1011 ConvertUTFResultContainer(sourceIllegal)
1012 .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1013 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1014 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
1015 .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1016 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1017 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1018 "\xc0" "\xe0\x80" "\xf0\x80\x80"
1019 "\xf8\x80\x80\x80"
1020 "\xfc\x80\x80\x80\x80"
1021 "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
1022 "\xfb\xbf\xbf\xbf"
1023 "\xfd\xbf\xbf\xbf\xbf"));
1024
1025 //
1026 // Overlong UTF-8 sequences
1027 //
1028
1029 // U+002F SOLIDUS
1030 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1031 ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
1032
1033 // Overlong sequences of the above.
1034 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1035 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1036 "\xc0\xaf"));
1037 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1038 ConvertUTFResultContainer(sourceIllegal)
1039 .withScalars(0xfffd, 0xfffd, 0xfffd),
1040 "\xe0\x80\xaf"));
1041 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1042 ConvertUTFResultContainer(sourceIllegal)
1043 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1044 "\xf0\x80\x80\xaf"));
1045 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1046 ConvertUTFResultContainer(sourceIllegal)
1047 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1048 "\xf8\x80\x80\x80\xaf"));
1049 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1050 ConvertUTFResultContainer(sourceIllegal)
1051 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1052 "\xfc\x80\x80\x80\x80\xaf"));
1053
1054 // U+0000 NULL
1055 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1056 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1057 StringRef("\x00", 1)));
1058
1059 // Overlong sequences of the above.
1060 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1061 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1062 "\xc0\x80"));
1063 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1064 ConvertUTFResultContainer(sourceIllegal)
1065 .withScalars(0xfffd, 0xfffd, 0xfffd),
1066 "\xe0\x80\x80"));
1067 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1068 ConvertUTFResultContainer(sourceIllegal)
1069 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1070 "\xf0\x80\x80\x80"));
1071 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1072 ConvertUTFResultContainer(sourceIllegal)
1073 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1074 "\xf8\x80\x80\x80\x80"));
1075 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1076 ConvertUTFResultContainer(sourceIllegal)
1077 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1078 "\xfc\x80\x80\x80\x80\x80"));
1079
1080 // Other overlong sequences.
1081 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1082 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1083 "\xc0\xbf"));
1084 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1085 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1086 "\xc1\x80"));
1087 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1088 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1089 "\xc1\xbf"));
1090 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1091 ConvertUTFResultContainer(sourceIllegal)
1092 .withScalars(0xfffd, 0xfffd, 0xfffd),
1093 "\xe0\x9f\xbf"));
1094 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1095 ConvertUTFResultContainer(sourceIllegal)
1096 .withScalars(0xfffd, 0xfffd, 0xfffd),
1097 "\xed\xa0\x80"));
1098 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1099 ConvertUTFResultContainer(sourceIllegal)
1100 .withScalars(0xfffd, 0xfffd, 0xfffd),
1101 "\xed\xbf\xbf"));
1102 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1103 ConvertUTFResultContainer(sourceIllegal)
1104 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1105 "\xf0\x8f\x80\x80"));
1106 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1107 ConvertUTFResultContainer(sourceIllegal)
1108 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1109 "\xf0\x8f\xbf\xbf"));
1110 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1111 ConvertUTFResultContainer(sourceIllegal)
1112 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1113 "\xf8\x87\xbf\xbf\xbf"));
1114 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1115 ConvertUTFResultContainer(sourceIllegal)
1116 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1117 "\xfc\x83\xbf\xbf\xbf\xbf"));
1118
1119 //
1120 // Isolated surrogates
1121 //
1122
1123 // Unicode 6.3.0:
1124 //
1125 // D71. High-surrogate code point: A Unicode code point in the range
1126 // U+D800 to U+DBFF.
1127 //
1128 // D73. Low-surrogate code point: A Unicode code point in the range
1129 // U+DC00 to U+DFFF.
1130
1131 // Note: U+E0100 is <DB40 DD00> in UTF16.
1132
1133 // High surrogates
1134
1135 // U+D800
1136 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1137 ConvertUTFResultContainer(sourceIllegal)
1138 .withScalars(0xfffd, 0xfffd, 0xfffd),
1139 "\xed\xa0\x80"));
1140
1141 // U+DB40
1142 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1143 ConvertUTFResultContainer(sourceIllegal)
1144 .withScalars(0xfffd, 0xfffd, 0xfffd),
1145 "\xed\xac\xa0"));
1146
1147 // U+DBFF
1148 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1149 ConvertUTFResultContainer(sourceIllegal)
1150 .withScalars(0xfffd, 0xfffd, 0xfffd),
1151 "\xed\xaf\xbf"));
1152
1153 // Low surrogates
1154
1155 // U+DC00
1156 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1157 ConvertUTFResultContainer(sourceIllegal)
1158 .withScalars(0xfffd, 0xfffd, 0xfffd),
1159 "\xed\xb0\x80"));
1160
1161 // U+DD00
1162 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1163 ConvertUTFResultContainer(sourceIllegal)
1164 .withScalars(0xfffd, 0xfffd, 0xfffd),
1165 "\xed\xb4\x80"));
1166
1167 // U+DFFF
1168 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1169 ConvertUTFResultContainer(sourceIllegal)
1170 .withScalars(0xfffd, 0xfffd, 0xfffd),
1171 "\xed\xbf\xbf"));
1172
1173 // Surrogate pairs
1174
1175 // U+D800 U+DC00
1176 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1177 ConvertUTFResultContainer(sourceIllegal)
1178 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1179 "\xed\xa0\x80\xed\xb0\x80"));
1180
1181 // U+D800 U+DD00
1182 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1183 ConvertUTFResultContainer(sourceIllegal)
1184 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1185 "\xed\xa0\x80\xed\xb4\x80"));
1186
1187 // U+D800 U+DFFF
1188 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1189 ConvertUTFResultContainer(sourceIllegal)
1190 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1191 "\xed\xa0\x80\xed\xbf\xbf"));
1192
1193 // U+DB40 U+DC00
1194 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1195 ConvertUTFResultContainer(sourceIllegal)
1196 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1197 "\xed\xac\xa0\xed\xb0\x80"));
1198
1199 // U+DB40 U+DD00
1200 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1201 ConvertUTFResultContainer(sourceIllegal)
1202 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1203 "\xed\xac\xa0\xed\xb4\x80"));
1204
1205 // U+DB40 U+DFFF
1206 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1207 ConvertUTFResultContainer(sourceIllegal)
1208 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1209 "\xed\xac\xa0\xed\xbf\xbf"));
1210
1211 // U+DBFF U+DC00
1212 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1213 ConvertUTFResultContainer(sourceIllegal)
1214 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1215 "\xed\xaf\xbf\xed\xb0\x80"));
1216
1217 // U+DBFF U+DD00
1218 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1219 ConvertUTFResultContainer(sourceIllegal)
1220 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1221 "\xed\xaf\xbf\xed\xb4\x80"));
1222
1223 // U+DBFF U+DFFF
1224 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1225 ConvertUTFResultContainer(sourceIllegal)
1226 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1227 "\xed\xaf\xbf\xed\xbf\xbf"));
1228
1229 //
1230 // Noncharacters
1231 //
1232
1233 // Unicode 6.3.0:
1234 //
1235 // D14. Noncharacter: A code point that is permanently reserved for
1236 // internal use and that should never be interchanged. Noncharacters
1237 // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1238 // and the values U+FDD0..U+FDEF.
1239
1240 // U+FFFE
1241 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1242 ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1243 "\xef\xbf\xbe"));
1244
1245 // U+FFFF
1246 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1247 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1248 "\xef\xbf\xbf"));
1249
1250 // U+1FFFE
1251 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1252 ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1253 "\xf0\x9f\xbf\xbe"));
1254
1255 // U+1FFFF
1256 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1257 ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1258 "\xf0\x9f\xbf\xbf"));
1259
1260 // U+2FFFE
1261 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1262 ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1263 "\xf0\xaf\xbf\xbe"));
1264
1265 // U+2FFFF
1266 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1267 ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1268 "\xf0\xaf\xbf\xbf"));
1269
1270 // U+3FFFE
1271 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1272 ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1273 "\xf0\xbf\xbf\xbe"));
1274
1275 // U+3FFFF
1276 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1277 ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1278 "\xf0\xbf\xbf\xbf"));
1279
1280 // U+4FFFE
1281 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1282 ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1283 "\xf1\x8f\xbf\xbe"));
1284
1285 // U+4FFFF
1286 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1287 ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1288 "\xf1\x8f\xbf\xbf"));
1289
1290 // U+5FFFE
1291 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1292 ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1293 "\xf1\x9f\xbf\xbe"));
1294
1295 // U+5FFFF
1296 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1297 ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1298 "\xf1\x9f\xbf\xbf"));
1299
1300 // U+6FFFE
1301 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1302 ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1303 "\xf1\xaf\xbf\xbe"));
1304
1305 // U+6FFFF
1306 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1307 ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1308 "\xf1\xaf\xbf\xbf"));
1309
1310 // U+7FFFE
1311 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1312 ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1313 "\xf1\xbf\xbf\xbe"));
1314
1315 // U+7FFFF
1316 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1317 ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1318 "\xf1\xbf\xbf\xbf"));
1319
1320 // U+8FFFE
1321 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1322 ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1323 "\xf2\x8f\xbf\xbe"));
1324
1325 // U+8FFFF
1326 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1327 ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1328 "\xf2\x8f\xbf\xbf"));
1329
1330 // U+9FFFE
1331 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1332 ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1333 "\xf2\x9f\xbf\xbe"));
1334
1335 // U+9FFFF
1336 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1337 ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1338 "\xf2\x9f\xbf\xbf"));
1339
1340 // U+AFFFE
1341 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1342 ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1343 "\xf2\xaf\xbf\xbe"));
1344
1345 // U+AFFFF
1346 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1347 ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1348 "\xf2\xaf\xbf\xbf"));
1349
1350 // U+BFFFE
1351 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1352 ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1353 "\xf2\xbf\xbf\xbe"));
1354
1355 // U+BFFFF
1356 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1357 ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1358 "\xf2\xbf\xbf\xbf"));
1359
1360 // U+CFFFE
1361 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1362 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1363 "\xf3\x8f\xbf\xbe"));
1364
1365 // U+CFFFF
1366 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1367 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1368 "\xf3\x8f\xbf\xbf"));
1369
1370 // U+DFFFE
1371 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1372 ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1373 "\xf3\x9f\xbf\xbe"));
1374
1375 // U+DFFFF
1376 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1377 ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1378 "\xf3\x9f\xbf\xbf"));
1379
1380 // U+EFFFE
1381 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1382 ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1383 "\xf3\xaf\xbf\xbe"));
1384
1385 // U+EFFFF
1386 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1387 ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1388 "\xf3\xaf\xbf\xbf"));
1389
1390 // U+FFFFE
1391 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1392 ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1393 "\xf3\xbf\xbf\xbe"));
1394
1395 // U+FFFFF
1396 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1397 ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1398 "\xf3\xbf\xbf\xbf"));
1399
1400 // U+10FFFE
1401 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1402 ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1403 "\xf4\x8f\xbf\xbe"));
1404
1405 // U+10FFFF
1406 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1407 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1408 "\xf4\x8f\xbf\xbf"));
1409
1410 // U+FDD0
1411 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1412 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1413 "\xef\xb7\x90"));
1414
1415 // U+FDD1
1416 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1417 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1418 "\xef\xb7\x91"));
1419
1420 // U+FDD2
1421 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1422 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1423 "\xef\xb7\x92"));
1424
1425 // U+FDD3
1426 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1427 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1428 "\xef\xb7\x93"));
1429
1430 // U+FDD4
1431 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1432 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1433 "\xef\xb7\x94"));
1434
1435 // U+FDD5
1436 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1437 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1438 "\xef\xb7\x95"));
1439
1440 // U+FDD6
1441 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1442 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1443 "\xef\xb7\x96"));
1444
1445 // U+FDD7
1446 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1447 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1448 "\xef\xb7\x97"));
1449
1450 // U+FDD8
1451 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1452 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1453 "\xef\xb7\x98"));
1454
1455 // U+FDD9
1456 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1457 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1458 "\xef\xb7\x99"));
1459
1460 // U+FDDA
1461 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1462 ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1463 "\xef\xb7\x9a"));
1464
1465 // U+FDDB
1466 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1467 ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1468 "\xef\xb7\x9b"));
1469
1470 // U+FDDC
1471 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1472 ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1473 "\xef\xb7\x9c"));
1474
1475 // U+FDDD
1476 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1477 ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1478 "\xef\xb7\x9d"));
1479
1480 // U+FDDE
1481 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1482 ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1483 "\xef\xb7\x9e"));
1484
1485 // U+FDDF
1486 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1487 ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1488 "\xef\xb7\x9f"));
1489
1490 // U+FDE0
1491 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1492 ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1493 "\xef\xb7\xa0"));
1494
1495 // U+FDE1
1496 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1497 ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1498 "\xef\xb7\xa1"));
1499
1500 // U+FDE2
1501 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1502 ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1503 "\xef\xb7\xa2"));
1504
1505 // U+FDE3
1506 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1507 ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1508 "\xef\xb7\xa3"));
1509
1510 // U+FDE4
1511 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1512 ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1513 "\xef\xb7\xa4"));
1514
1515 // U+FDE5
1516 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1517 ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1518 "\xef\xb7\xa5"));
1519
1520 // U+FDE6
1521 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1522 ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1523 "\xef\xb7\xa6"));
1524
1525 // U+FDE7
1526 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1527 ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1528 "\xef\xb7\xa7"));
1529
1530 // U+FDE8
1531 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1532 ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1533 "\xef\xb7\xa8"));
1534
1535 // U+FDE9
1536 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1537 ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1538 "\xef\xb7\xa9"));
1539
1540 // U+FDEA
1541 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1542 ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1543 "\xef\xb7\xaa"));
1544
1545 // U+FDEB
1546 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1547 ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1548 "\xef\xb7\xab"));
1549
1550 // U+FDEC
1551 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1552 ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1553 "\xef\xb7\xac"));
1554
1555 // U+FDED
1556 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1557 ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1558 "\xef\xb7\xad"));
1559
1560 // U+FDEE
1561 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1562 ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1563 "\xef\xb7\xae"));
1564
1565 // U+FDEF
1566 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1567 ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1568 "\xef\xb7\xaf"));
1569
1570 // U+FDF0
1571 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1572 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1573 "\xef\xb7\xb0"));
1574
1575 // U+FDF1
1576 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1577 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1578 "\xef\xb7\xb1"));
1579
1580 // U+FDF2
1581 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1582 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1583 "\xef\xb7\xb2"));
1584
1585 // U+FDF3
1586 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1587 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1588 "\xef\xb7\xb3"));
1589
1590 // U+FDF4
1591 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1592 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1593 "\xef\xb7\xb4"));
1594
1595 // U+FDF5
1596 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1597 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1598 "\xef\xb7\xb5"));
1599
1600 // U+FDF6
1601 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1602 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1603 "\xef\xb7\xb6"));
1604
1605 // U+FDF7
1606 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1607 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1608 "\xef\xb7\xb7"));
1609
1610 // U+FDF8
1611 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1612 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1613 "\xef\xb7\xb8"));
1614
1615 // U+FDF9
1616 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1617 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1618 "\xef\xb7\xb9"));
1619
1620 // U+FDFA
1621 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1622 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1623 "\xef\xb7\xba"));
1624
1625 // U+FDFB
1626 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1627 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1628 "\xef\xb7\xbb"));
1629
1630 // U+FDFC
1631 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1632 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1633 "\xef\xb7\xbc"));
1634
1635 // U+FDFD
1636 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1637 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1638 "\xef\xb7\xbd"));
1639
1640 // U+FDFE
1641 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1642 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1643 "\xef\xb7\xbe"));
1644
1645 // U+FDFF
1646 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1647 ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1648 "\xef\xb7\xbf"));
1649 }
1650
TEST(ConvertUTFTest,UTF8ToUTF32PartialLenient)1651 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1652 // U+0041 LATIN CAPITAL LETTER A
1653 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1654 ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1655 "\x41", true));
1656
1657 //
1658 // Sequences with one continuation byte missing
1659 //
1660
1661 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1662 ConvertUTFResultContainer(sourceExhausted),
1663 "\xc2", true));
1664 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1665 ConvertUTFResultContainer(sourceExhausted),
1666 "\xdf", true));
1667 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1668 ConvertUTFResultContainer(sourceExhausted),
1669 "\xe0\xa0", true));
1670 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1671 ConvertUTFResultContainer(sourceExhausted),
1672 "\xe0\xbf", true));
1673 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1674 ConvertUTFResultContainer(sourceExhausted),
1675 "\xe1\x80", true));
1676 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1677 ConvertUTFResultContainer(sourceExhausted),
1678 "\xec\xbf", true));
1679 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1680 ConvertUTFResultContainer(sourceExhausted),
1681 "\xed\x80", true));
1682 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1683 ConvertUTFResultContainer(sourceExhausted),
1684 "\xed\x9f", true));
1685 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1686 ConvertUTFResultContainer(sourceExhausted),
1687 "\xee\x80", true));
1688 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1689 ConvertUTFResultContainer(sourceExhausted),
1690 "\xef\xbf", true));
1691 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1692 ConvertUTFResultContainer(sourceExhausted),
1693 "\xf0\x90\x80", true));
1694 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1695 ConvertUTFResultContainer(sourceExhausted),
1696 "\xf0\xbf\xbf", true));
1697 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1698 ConvertUTFResultContainer(sourceExhausted),
1699 "\xf1\x80\x80", true));
1700 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1701 ConvertUTFResultContainer(sourceExhausted),
1702 "\xf3\xbf\xbf", true));
1703 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1704 ConvertUTFResultContainer(sourceExhausted),
1705 "\xf4\x80\x80", true));
1706 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1707 ConvertUTFResultContainer(sourceExhausted),
1708 "\xf4\x8f\xbf", true));
1709
1710 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1711 ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
1712 "\x41\xc2", true));
1713 }
1714
1715