1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/fx_unicode.h"
8
9 #include <stddef.h>
10
11 #include <iterator>
12
13 #include "core/fxcrt/check.h"
14 #include "core/fxcrt/check_op.h"
15 #include "core/fxcrt/compiler_specific.h"
16
17 namespace {
18
19 // Format of uint16_t values in kTextLayoutCodeProperties[].
20 constexpr uint16_t kBidiClassBitPos = 0;
21 constexpr uint16_t kBidiClassBitCount = 5;
22 constexpr uint16_t kBidiClassBitMask =
23 (((1u << kBidiClassBitCount) - 1) << kBidiClassBitPos);
24
25 constexpr uint16_t kMirrorBitPos = 5;
26 constexpr uint16_t kMirrorBitCount = 9;
27 constexpr uint16_t kMirrorMax = (1 << kMirrorBitCount) - 1;
28
29 #undef CHARPROP____
30 #define CHARPROP____(mirror, ct, bd, bt) \
31 ((mirror << kMirrorBitPos) | \
32 (static_cast<uint16_t>(FX_BIDICLASS::bd) << kBidiClassBitPos)),
33 constexpr uint16_t kTextLayoutCodeProperties[] = {
34 #include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include)
35 };
36 #undef CHARPROP____
37
38 static_assert(std::size(kTextLayoutCodeProperties) == 65536,
39 "missing characters");
40
GetUnicodeProperties(wchar_t wch)41 uint16_t GetUnicodeProperties(wchar_t wch) {
42 size_t idx = static_cast<size_t>(wch);
43 if (idx < std::size(kTextLayoutCodeProperties)) {
44 // SAFETY: `std::size(kTextLayoutCodeProperties)` is the size of the table,
45 // so the condition above verifies `idx` is in range.
46 return UNSAFE_BUFFERS(kTextLayoutCodeProperties[idx]);
47 }
48 return 0;
49 }
50
51 #ifdef PDF_ENABLE_XFA
52 // Format of uint16_t values in kExtendedTextLayoutCodeProperties[].
53 constexpr uint16_t kBreakTypeBitPos = 0;
54 constexpr uint16_t kBreakTypeBitCount = 6;
55 constexpr uint16_t kBreakTypeBitMask =
56 (((1u << kBreakTypeBitCount) - 1) << kBreakTypeBitPos);
57
58 constexpr uint16_t kCharTypeBitPos = 6;
59 constexpr uint16_t kCharTypeBitCount = 4;
60 constexpr uint16_t kCharTypeBitMask =
61 (((1u << kCharTypeBitCount) - 1) << kCharTypeBitPos);
62
63 #undef CHARPROP____
64 #define CHARPROP____(mirror, ct, bd, bt) \
65 ((static_cast<uint16_t>(FX_CHARTYPE::ct) << kCharTypeBitPos) | \
66 (static_cast<uint16_t>(FX_BREAKPROPERTY::bt) << kBreakTypeBitPos)),
67 constexpr uint16_t kExtendedTextLayoutCodeProperties[] = {
68 #include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include)
69 };
70 #undef CHARPROP____
71
72 static_assert(std::size(kExtendedTextLayoutCodeProperties) == 65536,
73 "missing characters");
74
GetExtendedUnicodeProperties(wchar_t wch)75 uint16_t GetExtendedUnicodeProperties(wchar_t wch) {
76 size_t idx = static_cast<size_t>(wch);
77 if (idx < std::size(kExtendedTextLayoutCodeProperties)) {
78 // SAFETY: `std::size(kExtendedTextLayoutCodeProperties)` is the size of
79 // the table, so the condition above verifies `idx` is in range.
80 return UNSAFE_BUFFERS(kExtendedTextLayoutCodeProperties[idx]);
81 }
82 return 0;
83 }
84
85 #endif // PDF_ENABLE_XFA
86
87 constexpr uint16_t kFXTextLayoutBidiMirror[] = {
88 0x0029, 0x0028, 0x003E, 0x003C, 0x005D, 0x005B, 0x007D, 0x007B, 0x00BB,
89 0x00AB, 0x0F3B, 0x0F3A, 0x0F3D, 0x0F3C, 0x169C, 0x169B, 0x2019, 0x2018,
90 0x201D, 0x201C, 0x203A, 0x2039, 0x2046, 0x2045, 0x207E, 0x207D, 0x208E,
91 0x208D, 0x220B, 0x220C, 0x220D, 0x2208, 0x2209, 0x220A, 0x29F5, 0x223D,
92 0x223C, 0x22CD, 0x2253, 0x2252, 0x2255, 0x2254, 0x2265, 0x2264, 0x2267,
93 0x2266, 0x2269, 0x2268, 0x226B, 0x226A, 0x226F, 0x226E, 0x2271, 0x2270,
94 0x2273, 0x2272, 0x2275, 0x2274, 0x2277, 0x2276, 0x2279, 0x2278, 0x227B,
95 0x227A, 0x227D, 0x227C, 0x227F, 0x227E, 0x2281, 0x2280, 0x2283, 0x2282,
96 0x2285, 0x2284, 0x2287, 0x2286, 0x2289, 0x2288, 0x228B, 0x228A, 0x2290,
97 0x228F, 0x2292, 0x2291, 0x29B8, 0x22A3, 0x22A2, 0x2ADE, 0x2AE4, 0x2AE3,
98 0x2AE5, 0x22B1, 0x22B0, 0x22B3, 0x22B2, 0x22B5, 0x22B4, 0x22B7, 0x22B6,
99 0x22CA, 0x22C9, 0x22CC, 0x22CB, 0x2243, 0x22D1, 0x22D0, 0x22D7, 0x22D6,
100 0x22D9, 0x22D8, 0x22DB, 0x22DA, 0x22DD, 0x22DC, 0x22DF, 0x22DE, 0x22E1,
101 0x22E0, 0x22E3, 0x22E2, 0x22E5, 0x22E4, 0x22E7, 0x22E6, 0x22E9, 0x22E8,
102 0x22EB, 0x22EA, 0x22ED, 0x22EC, 0x22F1, 0x22F0, 0x22FA, 0x22FB, 0x22FC,
103 0x22FD, 0x22FE, 0x22F2, 0x22F3, 0x22F4, 0x22F6, 0x22F7, 0x2309, 0x2308,
104 0x230B, 0x230A, 0x232A, 0x2329, 0x2769, 0x2768, 0x276B, 0x276A, 0x276D,
105 0x276C, 0x276F, 0x276E, 0x2771, 0x2770, 0x2773, 0x2772, 0x2775, 0x2774,
106 0x27C4, 0x27C3, 0x27C6, 0x27C5, 0x27C9, 0x27C8, 0x27D6, 0x27D5, 0x27DE,
107 0x27DD, 0x27E3, 0x27E2, 0x27E5, 0x27E4, 0x27E7, 0x27E6, 0x27E9, 0x27E8,
108 0x27EB, 0x27EA, 0x27ED, 0x27EC, 0x27EF, 0x27EE, 0x2984, 0x2983, 0x2986,
109 0x2985, 0x2988, 0x2987, 0x298A, 0x2989, 0x298C, 0x298B, 0x2990, 0x298F,
110 0x298E, 0x298D, 0x2992, 0x2991, 0x2994, 0x2993, 0x2996, 0x2995, 0x2998,
111 0x2997, 0x2298, 0x29C1, 0x29C0, 0x29C5, 0x29C4, 0x29D0, 0x29CF, 0x29D2,
112 0x29D1, 0x29D5, 0x29D4, 0x29D9, 0x29D8, 0x29DB, 0x29DA, 0x2215, 0x29F9,
113 0x29F8, 0x29FD, 0x29FC, 0x2A2C, 0x2A2B, 0x2A2E, 0x2A2D, 0x2A35, 0x2A34,
114 0x2A3D, 0x2A3C, 0x2A65, 0x2A64, 0x2A7A, 0x2A79, 0x2A7E, 0x2A7D, 0x2A80,
115 0x2A7F, 0x2A82, 0x2A81, 0x2A84, 0x2A83, 0x2A8C, 0x2A8B, 0x2A92, 0x2A91,
116 0x2A94, 0x2A93, 0x2A96, 0x2A95, 0x2A98, 0x2A97, 0x2A9A, 0x2A99, 0x2A9C,
117 0x2A9B, 0x2AA2, 0x2AA1, 0x2AA7, 0x2AA6, 0x2AA9, 0x2AA8, 0x2AAB, 0x2AAA,
118 0x2AAD, 0x2AAC, 0x2AB0, 0x2AAF, 0x2AB4, 0x2AB3, 0x2ABC, 0x2ABB, 0x2ABE,
119 0x2ABD, 0x2AC0, 0x2ABF, 0x2AC2, 0x2AC1, 0x2AC4, 0x2AC3, 0x2AC6, 0x2AC5,
120 0x2ACE, 0x2ACD, 0x2AD0, 0x2ACF, 0x2AD2, 0x2AD1, 0x2AD4, 0x2AD3, 0x2AD6,
121 0x2AD5, 0x22A6, 0x22A9, 0x22A8, 0x22AB, 0x2AED, 0x2AEC, 0x2AF8, 0x2AF7,
122 0x2AFA, 0x2AF9, 0x2E03, 0x2E02, 0x2E05, 0x2E04, 0x2E0A, 0x2E09, 0x2E0D,
123 0x2E0C, 0x2E1D, 0x2E1C, 0x2E21, 0x2E20, 0x2E23, 0x2E22, 0x2E25, 0x2E24,
124 0x2E27, 0x2E26, 0x2E29, 0x2E28, 0x3009, 0x3008, 0x300B, 0x300A, 0x300D,
125 0x300C, 0x300F, 0x300E, 0x3011, 0x3010, 0x3015, 0x3014, 0x3017, 0x3016,
126 0x3019, 0x3018, 0x301B, 0x301A, 0xFE5A, 0xFE59, 0xFE5C, 0xFE5B, 0xFE5E,
127 0xFE5D, 0xFE65, 0xFE64, 0xFF09, 0xFF08, 0xFF1E, 0xFF1C, 0xFF3D, 0xFF3B,
128 0xFF5D, 0xFF5B, 0xFF60, 0xFF5F, 0xFF63, 0xFF62,
129 };
130
131 // Check that the mirror indicies in the fx_ucddata.inc table are in bounds.
132 #undef CHARPROP____
133 #define CHARPROP____(mirror, ct, bd, bt) \
134 static_assert( \
135 mirror == kMirrorMax || mirror < std::size(kFXTextLayoutBidiMirror), \
136 "Bad mirror index");
137 #include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include)
138 #undef CHARPROP____
139
140 } // namespace
141
142 namespace pdfium::unicode {
143
GetMirrorChar(wchar_t wch)144 wchar_t GetMirrorChar(wchar_t wch) {
145 uint16_t prop = GetUnicodeProperties(wch);
146 size_t idx = prop >> kMirrorBitPos;
147 if (idx == kMirrorMax) {
148 return wch;
149 }
150 CHECK_LT(idx, std::size(kFXTextLayoutBidiMirror));
151 // SAFETY: `std::size(kFXTextLayoutBidiMirror)` is the size of the table, so
152 // the CHECK() above verifies `idx` is in range.
153 return UNSAFE_BUFFERS(kFXTextLayoutBidiMirror[idx]);
154 }
155
GetBidiClass(wchar_t wch)156 FX_BIDICLASS GetBidiClass(wchar_t wch) {
157 uint16_t prop = GetUnicodeProperties(wch);
158 uint16_t result = (prop & kBidiClassBitMask) >> kBidiClassBitPos;
159 DCHECK(result <= static_cast<uint16_t>(FX_BIDICLASS::kPDF));
160 return static_cast<FX_BIDICLASS>(result);
161 }
162
163 #ifdef PDF_ENABLE_XFA
GetCharType(wchar_t wch)164 FX_CHARTYPE GetCharType(wchar_t wch) {
165 uint16_t prop = GetExtendedUnicodeProperties(wch);
166 uint16_t result = (prop & kCharTypeBitMask) >> kCharTypeBitPos;
167 DCHECK(result <= static_cast<uint16_t>(FX_CHARTYPE::kArabic));
168 return static_cast<FX_CHARTYPE>(result);
169 }
170
GetBreakProperty(wchar_t wch)171 FX_BREAKPROPERTY GetBreakProperty(wchar_t wch) {
172 uint16_t prop = GetExtendedUnicodeProperties(wch);
173 uint16_t result = (prop & kBreakTypeBitMask) >> kBreakTypeBitPos;
174 DCHECK(result <= static_cast<uint16_t>(FX_BREAKPROPERTY::kTB));
175 return static_cast<FX_BREAKPROPERTY>(result);
176 }
177 #endif // PDF_ENABLE_XFA
178
179 } // namespace pdfium::unicode
180