1 //===----------------------------------------------------------------------===//
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7
8 // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
9 // UNSUPPORTED: no-filesystem
10 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
11
12 // <print>
13
14 // Tests the UTF-8 to UTF-16/32 encoding.
15 // UTF-16 is used on Windows to write to the Unicode API.
16 // UTF-32 is used to test the Windows behaviour on Linux using 32-bit wchar_t.
17
18 #include <algorithm>
19 #include <array>
20 #include <cassert>
21 #include <print>
22 #include <string_view>
23
24 #include "test_macros.h"
25 #include "make_string.h"
26
27 #define SV(S) MAKE_STRING_VIEW(CharT, S)
28
29 template <class CharT>
test(std::basic_string_view<CharT> expected,std::string_view input)30 constexpr void test(std::basic_string_view<CharT> expected, std::string_view input) {
31 assert(expected.size() < 1024);
32 std::array<CharT, 1024> buffer;
33 std::ranges::fill(buffer, CharT('*'));
34
35 CharT* out = std::__unicode::__transcode(input.begin(), input.end(), buffer.data());
36
37 assert(std::basic_string_view<CharT>(buffer.data(), out) == expected);
38
39 out = std::find_if(out, buffer.end(), [](CharT c) { return c != CharT('*'); });
40 assert(out == buffer.end());
41 }
42
43 template <class CharT>
test()44 constexpr void test() {
45 // *** Test valid UTF-8 ***
46 #define TEST(S) test(SV(S), S)
47 TEST("hello world");
48 // copied from benchmarks/std_format_spec_string_unicode.bench.cpp
49 TEST("Lorem ipsum dolor sit amet, ne sensibus evertitur aliquando his. Iuvaret fabulas qui ex.");
50 TEST("Lōrem ipsūm dolor sīt æmeÞ, ea vel nostrud feuġǣit, muciūs tēmporiȝusrefērrēnÞur no mel.");
51 TEST("Лорем ипсум долор сит амет, еу диам тамяуам принципес вис, еяуидем цонцептам диспутандо");
52 TEST("入ト年媛ろ舗学ラロ準募ケカ社金スノ屋検れう策他セヲシ引口ぎ集7独ぱクふ出車ぽでぱ円輪ルノ受打わ。");
53 TEST("\U0001f636\u200d\U0001f32b\ufe0f");
54 #undef TEST
55
56 // *** Test invalid UTF-8 ***
57 test(SV("\ufffd"), "\xc3");
58 test(SV("\ufffd("), "\xc3\x28");
59
60 // Surrogate range
61 test(SV("\ufffd"), "\xed\xa0\x80"); // U+D800
62 test(SV("\ufffd"), "\xed\xaf\xbf"); // U+DBFF
63 test(SV("\ufffd"), "\xed\xbf\x80"); // U+DC00
64 test(SV("\ufffd"), "\xed\xbf\xbf"); // U+DFFF
65
66 // Beyond valid values
67 test(SV("\ufffd"), "\xf4\x90\x80\x80"); // U+110000
68 test(SV("\ufffd"), "\xf4\xbf\xbf\xbf"); // U+11FFFF
69
70 // Validates http://unicode.org/review/pr-121.html option 3.
71 test(SV("\u0061\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0062"), "\x61\xF1\x80\x80\xE1\x80\xC2\x62");
72 }
73
test()74 constexpr bool test() {
75 test<char16_t>();
76 test<char32_t>();
77 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS)
78 test<wchar_t>();
79 #endif
80 return true;
81 }
82
main(int,char **)83 int main(int, char**) {
84 test();
85 static_assert(test());
86
87 return 0;
88 }
89