• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/fx_unicode.h"
8 
9 #include <stddef.h>
10 
11 #include <iterator>
12 
13 #include "core/fxcrt/check.h"
14 #include "core/fxcrt/check_op.h"
15 #include "core/fxcrt/compiler_specific.h"
16 
17 namespace {
18 
19 // Format of uint16_t values in kTextLayoutCodeProperties[].
20 constexpr uint16_t kBidiClassBitPos = 0;
21 constexpr uint16_t kBidiClassBitCount = 5;
22 constexpr uint16_t kBidiClassBitMask =
23     (((1u << kBidiClassBitCount) - 1) << kBidiClassBitPos);
24 
25 constexpr uint16_t kMirrorBitPos = 5;
26 constexpr uint16_t kMirrorBitCount = 9;
27 constexpr uint16_t kMirrorMax = (1 << kMirrorBitCount) - 1;
28 
29 #undef CHARPROP____
30 #define CHARPROP____(mirror, ct, bd, bt) \
31   ((mirror << kMirrorBitPos) |           \
32    (static_cast<uint16_t>(FX_BIDICLASS::bd) << kBidiClassBitPos)),
33 constexpr uint16_t kTextLayoutCodeProperties[] = {
34 #include "core/fxcrt/fx_ucddata.inc"  // NOLINT(build/include)
35 };
36 #undef CHARPROP____
37 
38 static_assert(std::size(kTextLayoutCodeProperties) == 65536,
39               "missing characters");
40 
GetUnicodeProperties(wchar_t wch)41 uint16_t GetUnicodeProperties(wchar_t wch) {
42   size_t idx = static_cast<size_t>(wch);
43   if (idx < std::size(kTextLayoutCodeProperties)) {
44     // SAFETY: `std::size(kTextLayoutCodeProperties)` is the size of the table,
45     // so the condition above verifies `idx` is in range.
46     return UNSAFE_BUFFERS(kTextLayoutCodeProperties[idx]);
47   }
48   return 0;
49 }
50 
51 #ifdef PDF_ENABLE_XFA
52 // Format of uint16_t values in kExtendedTextLayoutCodeProperties[].
53 constexpr uint16_t kBreakTypeBitPos = 0;
54 constexpr uint16_t kBreakTypeBitCount = 6;
55 constexpr uint16_t kBreakTypeBitMask =
56     (((1u << kBreakTypeBitCount) - 1) << kBreakTypeBitPos);
57 
58 constexpr uint16_t kCharTypeBitPos = 6;
59 constexpr uint16_t kCharTypeBitCount = 4;
60 constexpr uint16_t kCharTypeBitMask =
61     (((1u << kCharTypeBitCount) - 1) << kCharTypeBitPos);
62 
63 #undef CHARPROP____
64 #define CHARPROP____(mirror, ct, bd, bt)                         \
65   ((static_cast<uint16_t>(FX_CHARTYPE::ct) << kCharTypeBitPos) | \
66    (static_cast<uint16_t>(FX_BREAKPROPERTY::bt) << kBreakTypeBitPos)),
67 constexpr uint16_t kExtendedTextLayoutCodeProperties[] = {
68 #include "core/fxcrt/fx_ucddata.inc"  // NOLINT(build/include)
69 };
70 #undef CHARPROP____
71 
72 static_assert(std::size(kExtendedTextLayoutCodeProperties) == 65536,
73               "missing characters");
74 
GetExtendedUnicodeProperties(wchar_t wch)75 uint16_t GetExtendedUnicodeProperties(wchar_t wch) {
76   size_t idx = static_cast<size_t>(wch);
77   if (idx < std::size(kExtendedTextLayoutCodeProperties)) {
78     // SAFETY: `std::size(kExtendedTextLayoutCodeProperties)` is the size of
79     // the table, so the condition above verifies `idx` is in range.
80     return UNSAFE_BUFFERS(kExtendedTextLayoutCodeProperties[idx]);
81   }
82   return 0;
83 }
84 
85 #endif  // PDF_ENABLE_XFA
86 
87 constexpr uint16_t kFXTextLayoutBidiMirror[] = {
88     0x0029, 0x0028, 0x003E, 0x003C, 0x005D, 0x005B, 0x007D, 0x007B, 0x00BB,
89     0x00AB, 0x0F3B, 0x0F3A, 0x0F3D, 0x0F3C, 0x169C, 0x169B, 0x2019, 0x2018,
90     0x201D, 0x201C, 0x203A, 0x2039, 0x2046, 0x2045, 0x207E, 0x207D, 0x208E,
91     0x208D, 0x220B, 0x220C, 0x220D, 0x2208, 0x2209, 0x220A, 0x29F5, 0x223D,
92     0x223C, 0x22CD, 0x2253, 0x2252, 0x2255, 0x2254, 0x2265, 0x2264, 0x2267,
93     0x2266, 0x2269, 0x2268, 0x226B, 0x226A, 0x226F, 0x226E, 0x2271, 0x2270,
94     0x2273, 0x2272, 0x2275, 0x2274, 0x2277, 0x2276, 0x2279, 0x2278, 0x227B,
95     0x227A, 0x227D, 0x227C, 0x227F, 0x227E, 0x2281, 0x2280, 0x2283, 0x2282,
96     0x2285, 0x2284, 0x2287, 0x2286, 0x2289, 0x2288, 0x228B, 0x228A, 0x2290,
97     0x228F, 0x2292, 0x2291, 0x29B8, 0x22A3, 0x22A2, 0x2ADE, 0x2AE4, 0x2AE3,
98     0x2AE5, 0x22B1, 0x22B0, 0x22B3, 0x22B2, 0x22B5, 0x22B4, 0x22B7, 0x22B6,
99     0x22CA, 0x22C9, 0x22CC, 0x22CB, 0x2243, 0x22D1, 0x22D0, 0x22D7, 0x22D6,
100     0x22D9, 0x22D8, 0x22DB, 0x22DA, 0x22DD, 0x22DC, 0x22DF, 0x22DE, 0x22E1,
101     0x22E0, 0x22E3, 0x22E2, 0x22E5, 0x22E4, 0x22E7, 0x22E6, 0x22E9, 0x22E8,
102     0x22EB, 0x22EA, 0x22ED, 0x22EC, 0x22F1, 0x22F0, 0x22FA, 0x22FB, 0x22FC,
103     0x22FD, 0x22FE, 0x22F2, 0x22F3, 0x22F4, 0x22F6, 0x22F7, 0x2309, 0x2308,
104     0x230B, 0x230A, 0x232A, 0x2329, 0x2769, 0x2768, 0x276B, 0x276A, 0x276D,
105     0x276C, 0x276F, 0x276E, 0x2771, 0x2770, 0x2773, 0x2772, 0x2775, 0x2774,
106     0x27C4, 0x27C3, 0x27C6, 0x27C5, 0x27C9, 0x27C8, 0x27D6, 0x27D5, 0x27DE,
107     0x27DD, 0x27E3, 0x27E2, 0x27E5, 0x27E4, 0x27E7, 0x27E6, 0x27E9, 0x27E8,
108     0x27EB, 0x27EA, 0x27ED, 0x27EC, 0x27EF, 0x27EE, 0x2984, 0x2983, 0x2986,
109     0x2985, 0x2988, 0x2987, 0x298A, 0x2989, 0x298C, 0x298B, 0x2990, 0x298F,
110     0x298E, 0x298D, 0x2992, 0x2991, 0x2994, 0x2993, 0x2996, 0x2995, 0x2998,
111     0x2997, 0x2298, 0x29C1, 0x29C0, 0x29C5, 0x29C4, 0x29D0, 0x29CF, 0x29D2,
112     0x29D1, 0x29D5, 0x29D4, 0x29D9, 0x29D8, 0x29DB, 0x29DA, 0x2215, 0x29F9,
113     0x29F8, 0x29FD, 0x29FC, 0x2A2C, 0x2A2B, 0x2A2E, 0x2A2D, 0x2A35, 0x2A34,
114     0x2A3D, 0x2A3C, 0x2A65, 0x2A64, 0x2A7A, 0x2A79, 0x2A7E, 0x2A7D, 0x2A80,
115     0x2A7F, 0x2A82, 0x2A81, 0x2A84, 0x2A83, 0x2A8C, 0x2A8B, 0x2A92, 0x2A91,
116     0x2A94, 0x2A93, 0x2A96, 0x2A95, 0x2A98, 0x2A97, 0x2A9A, 0x2A99, 0x2A9C,
117     0x2A9B, 0x2AA2, 0x2AA1, 0x2AA7, 0x2AA6, 0x2AA9, 0x2AA8, 0x2AAB, 0x2AAA,
118     0x2AAD, 0x2AAC, 0x2AB0, 0x2AAF, 0x2AB4, 0x2AB3, 0x2ABC, 0x2ABB, 0x2ABE,
119     0x2ABD, 0x2AC0, 0x2ABF, 0x2AC2, 0x2AC1, 0x2AC4, 0x2AC3, 0x2AC6, 0x2AC5,
120     0x2ACE, 0x2ACD, 0x2AD0, 0x2ACF, 0x2AD2, 0x2AD1, 0x2AD4, 0x2AD3, 0x2AD6,
121     0x2AD5, 0x22A6, 0x22A9, 0x22A8, 0x22AB, 0x2AED, 0x2AEC, 0x2AF8, 0x2AF7,
122     0x2AFA, 0x2AF9, 0x2E03, 0x2E02, 0x2E05, 0x2E04, 0x2E0A, 0x2E09, 0x2E0D,
123     0x2E0C, 0x2E1D, 0x2E1C, 0x2E21, 0x2E20, 0x2E23, 0x2E22, 0x2E25, 0x2E24,
124     0x2E27, 0x2E26, 0x2E29, 0x2E28, 0x3009, 0x3008, 0x300B, 0x300A, 0x300D,
125     0x300C, 0x300F, 0x300E, 0x3011, 0x3010, 0x3015, 0x3014, 0x3017, 0x3016,
126     0x3019, 0x3018, 0x301B, 0x301A, 0xFE5A, 0xFE59, 0xFE5C, 0xFE5B, 0xFE5E,
127     0xFE5D, 0xFE65, 0xFE64, 0xFF09, 0xFF08, 0xFF1E, 0xFF1C, 0xFF3D, 0xFF3B,
128     0xFF5D, 0xFF5B, 0xFF60, 0xFF5F, 0xFF63, 0xFF62,
129 };
130 
131 // Check that the mirror indicies in the fx_ucddata.inc table are in bounds.
132 #undef CHARPROP____
133 #define CHARPROP____(mirror, ct, bd, bt)                                   \
134   static_assert(                                                           \
135       mirror == kMirrorMax || mirror < std::size(kFXTextLayoutBidiMirror), \
136       "Bad mirror index");
137 #include "core/fxcrt/fx_ucddata.inc"  // NOLINT(build/include)
138 #undef CHARPROP____
139 
140 }  // namespace
141 
142 namespace pdfium::unicode {
143 
GetMirrorChar(wchar_t wch)144 wchar_t GetMirrorChar(wchar_t wch) {
145   uint16_t prop = GetUnicodeProperties(wch);
146   size_t idx = prop >> kMirrorBitPos;
147   if (idx == kMirrorMax) {
148     return wch;
149   }
150   CHECK_LT(idx, std::size(kFXTextLayoutBidiMirror));
151   // SAFETY: `std::size(kFXTextLayoutBidiMirror)` is the size of the table, so
152   // the CHECK() above verifies `idx` is in range.
153   return UNSAFE_BUFFERS(kFXTextLayoutBidiMirror[idx]);
154 }
155 
GetBidiClass(wchar_t wch)156 FX_BIDICLASS GetBidiClass(wchar_t wch) {
157   uint16_t prop = GetUnicodeProperties(wch);
158   uint16_t result = (prop & kBidiClassBitMask) >> kBidiClassBitPos;
159   DCHECK(result <= static_cast<uint16_t>(FX_BIDICLASS::kPDF));
160   return static_cast<FX_BIDICLASS>(result);
161 }
162 
163 #ifdef PDF_ENABLE_XFA
GetCharType(wchar_t wch)164 FX_CHARTYPE GetCharType(wchar_t wch) {
165   uint16_t prop = GetExtendedUnicodeProperties(wch);
166   uint16_t result = (prop & kCharTypeBitMask) >> kCharTypeBitPos;
167   DCHECK(result <= static_cast<uint16_t>(FX_CHARTYPE::kArabic));
168   return static_cast<FX_CHARTYPE>(result);
169 }
170 
GetBreakProperty(wchar_t wch)171 FX_BREAKPROPERTY GetBreakProperty(wchar_t wch) {
172   uint16_t prop = GetExtendedUnicodeProperties(wch);
173   uint16_t result = (prop & kBreakTypeBitMask) >> kBreakTypeBitPos;
174   DCHECK(result <= static_cast<uint16_t>(FX_BREAKPROPERTY::kTB));
175   return static_cast<FX_BREAKPROPERTY>(result);
176 }
177 #endif  // PDF_ENABLE_XFA
178 
179 }  // namespace pdfium::unicode
180