• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===----------------------------------------------------------------------===//
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7 
8 // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
9 // UNSUPPORTED: no-filesystem
10 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
11 
12 // <print>
13 
14 // Tests the UTF-8 to UTF-16/32 encoding.
15 // UTF-16 is used on Windows to write to the Unicode API.
16 // UTF-32 is used to test the Windows behaviour on Linux using 32-bit wchar_t.
17 
18 #include <algorithm>
19 #include <array>
20 #include <cassert>
21 #include <print>
22 #include <string_view>
23 
24 #include "test_macros.h"
25 #include "make_string.h"
26 
27 #define SV(S) MAKE_STRING_VIEW(CharT, S)
28 
29 template <class CharT>
test(std::basic_string_view<CharT> expected,std::string_view input)30 constexpr void test(std::basic_string_view<CharT> expected, std::string_view input) {
31   assert(expected.size() < 1024);
32   std::array<CharT, 1024> buffer;
33   std::ranges::fill(buffer, CharT('*'));
34 
35   CharT* out = std::__unicode::__transcode(input.begin(), input.end(), buffer.data());
36 
37   assert(std::basic_string_view<CharT>(buffer.data(), out) == expected);
38 
39   out = std::find_if(out, buffer.end(), [](CharT c) { return c != CharT('*'); });
40   assert(out == buffer.end());
41 }
42 
43 template <class CharT>
test()44 constexpr void test() {
45   // *** Test valid UTF-8 ***
46 #define TEST(S) test(SV(S), S)
47   TEST("hello world");
48   // copied from benchmarks/std_format_spec_string_unicode.bench.cpp
49   TEST("Lorem ipsum dolor sit amet, ne sensibus evertitur aliquando his. Iuvaret fabulas qui ex.");
50   TEST("Lōrem ipsūm dolor sīt æmeÞ, ea vel nostrud feuġǣit, muciūs tēmporiȝusrefērrēnÞur no mel.");
51   TEST("Лорем ипсум долор сит амет, еу диам тамяуам принципес вис, еяуидем цонцептам диспутандо");
52   TEST("入ト年媛ろ舗学ラロ準募ケカ社金スノ屋検れう策他セヲシ引口ぎ集7独ぱクふ出車ぽでぱ円輪ルノ受打わ。");
53   TEST("\U0001f636\u200d\U0001f32b\ufe0f");
54 #undef TEST
55 
56   // *** Test invalid UTF-8 ***
57   test(SV("\ufffd"), "\xc3");
58   test(SV("\ufffd("), "\xc3\x28");
59 
60   // Surrogate range
61   test(SV("\ufffd"), "\xed\xa0\x80"); // U+D800
62   test(SV("\ufffd"), "\xed\xaf\xbf"); // U+DBFF
63   test(SV("\ufffd"), "\xed\xbf\x80"); // U+DC00
64   test(SV("\ufffd"), "\xed\xbf\xbf"); // U+DFFF
65 
66   // Beyond valid values
67   test(SV("\ufffd"), "\xf4\x90\x80\x80"); // U+110000
68   test(SV("\ufffd"), "\xf4\xbf\xbf\xbf"); // U+11FFFF
69 
70   // Validates http://unicode.org/review/pr-121.html option 3.
71   test(SV("\u0061\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0062"), "\x61\xF1\x80\x80\xE1\x80\xC2\x62");
72 }
73 
test()74 constexpr bool test() {
75   test<char16_t>();
76   test<char32_t>();
77 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS)
78   test<wchar_t>();
79 #endif
80   return true;
81 }
82 
main(int,char **)83 int main(int, char**) {
84   test();
85   static_assert(test());
86 
87   return 0;
88 }
89