1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/fx_unicode.h"
8 
9 #include <stddef.h>
10 
11 #include <iterator>
12 
13 #include "third_party/base/check.h"
14 
15 namespace {
16 
17 // Format of uint16_t values in kTextLayoutCodeProperties[].
18 constexpr uint16_t kBidiClassBitPos = 0;
19 constexpr uint16_t kBidiClassBitCount = 5;
20 constexpr uint16_t kBidiClassBitMask =
21     (((1u << kBidiClassBitCount) - 1) << kBidiClassBitPos);
22 
23 constexpr uint16_t kMirrorBitPos = 5;
24 constexpr uint16_t kMirrorBitCount = 9;
25 constexpr uint16_t kMirrorMax = (1 << kMirrorBitCount) - 1;
26 
27 #undef CHARPROP____
28 #define CHARPROP____(mirror, ct, bd, bt) \
29   ((mirror << kMirrorBitPos) |           \
30    (static_cast<uint16_t>(FX_BIDICLASS::bd) << kBidiClassBitPos)),
31 constexpr uint16_t kTextLayoutCodeProperties[] = {
32 #include "core/fxcrt/fx_ucddata.inc"  // NOLINT(build/include)
33 };
34 #undef CHARPROP____
35 
36 constexpr size_t kTextLayoutCodePropertiesSize =
37     std::size(kTextLayoutCodeProperties);
38 
39 static_assert(kTextLayoutCodePropertiesSize == 65536, "missing characters");
40 
GetUnicodeProperties(wchar_t wch)41 uint16_t GetUnicodeProperties(wchar_t wch) {
42   size_t idx = static_cast<size_t>(wch);
43   if (idx < kTextLayoutCodePropertiesSize)
44     return kTextLayoutCodeProperties[idx];
45   return 0;
46 }
47 
48 #ifdef PDF_ENABLE_XFA
49 // Format of uint16_t values in kExtendedTextLayoutCodeProperties[].
50 constexpr uint16_t kBreakTypeBitPos = 0;
51 constexpr uint16_t kBreakTypeBitCount = 6;
52 constexpr uint16_t kBreakTypeBitMask =
53     (((1u << kBreakTypeBitCount) - 1) << kBreakTypeBitPos);
54 
55 constexpr uint16_t kCharTypeBitPos = 6;
56 constexpr uint16_t kCharTypeBitCount = 4;
57 constexpr uint16_t kCharTypeBitMask =
58     (((1u << kCharTypeBitCount) - 1) << kCharTypeBitPos);
59 
60 #undef CHARPROP____
61 #define CHARPROP____(mirror, ct, bd, bt)                         \
62   ((static_cast<uint16_t>(FX_CHARTYPE::ct) << kCharTypeBitPos) | \
63    (static_cast<uint16_t>(FX_BREAKPROPERTY::bt) << kBreakTypeBitPos)),
64 constexpr uint16_t kExtendedTextLayoutCodeProperties[] = {
65 #include "core/fxcrt/fx_ucddata.inc"  // NOLINT(build/include)
66 };
67 #undef CHARPROP____
68 
69 constexpr size_t kExtendedTextLayoutCodePropertiesSize =
70     std::size(kExtendedTextLayoutCodeProperties);
71 
72 static_assert(kExtendedTextLayoutCodePropertiesSize == 65536,
73               "missing characters");
74 
GetExtendedUnicodeProperties(wchar_t wch)75 uint16_t GetExtendedUnicodeProperties(wchar_t wch) {
76   size_t idx = static_cast<size_t>(wch);
77   if (idx < kExtendedTextLayoutCodePropertiesSize)
78     return kExtendedTextLayoutCodeProperties[idx];
79   return 0;
80 }
81 
82 #endif  // PDF_ENABLE_XFA
83 
84 constexpr uint16_t kFXTextLayoutBidiMirror[] = {
85     0x0029, 0x0028, 0x003E, 0x003C, 0x005D, 0x005B, 0x007D, 0x007B, 0x00BB,
86     0x00AB, 0x0F3B, 0x0F3A, 0x0F3D, 0x0F3C, 0x169C, 0x169B, 0x2019, 0x2018,
87     0x201D, 0x201C, 0x203A, 0x2039, 0x2046, 0x2045, 0x207E, 0x207D, 0x208E,
88     0x208D, 0x220B, 0x220C, 0x220D, 0x2208, 0x2209, 0x220A, 0x29F5, 0x223D,
89     0x223C, 0x22CD, 0x2253, 0x2252, 0x2255, 0x2254, 0x2265, 0x2264, 0x2267,
90     0x2266, 0x2269, 0x2268, 0x226B, 0x226A, 0x226F, 0x226E, 0x2271, 0x2270,
91     0x2273, 0x2272, 0x2275, 0x2274, 0x2277, 0x2276, 0x2279, 0x2278, 0x227B,
92     0x227A, 0x227D, 0x227C, 0x227F, 0x227E, 0x2281, 0x2280, 0x2283, 0x2282,
93     0x2285, 0x2284, 0x2287, 0x2286, 0x2289, 0x2288, 0x228B, 0x228A, 0x2290,
94     0x228F, 0x2292, 0x2291, 0x29B8, 0x22A3, 0x22A2, 0x2ADE, 0x2AE4, 0x2AE3,
95     0x2AE5, 0x22B1, 0x22B0, 0x22B3, 0x22B2, 0x22B5, 0x22B4, 0x22B7, 0x22B6,
96     0x22CA, 0x22C9, 0x22CC, 0x22CB, 0x2243, 0x22D1, 0x22D0, 0x22D7, 0x22D6,
97     0x22D9, 0x22D8, 0x22DB, 0x22DA, 0x22DD, 0x22DC, 0x22DF, 0x22DE, 0x22E1,
98     0x22E0, 0x22E3, 0x22E2, 0x22E5, 0x22E4, 0x22E7, 0x22E6, 0x22E9, 0x22E8,
99     0x22EB, 0x22EA, 0x22ED, 0x22EC, 0x22F1, 0x22F0, 0x22FA, 0x22FB, 0x22FC,
100     0x22FD, 0x22FE, 0x22F2, 0x22F3, 0x22F4, 0x22F6, 0x22F7, 0x2309, 0x2308,
101     0x230B, 0x230A, 0x232A, 0x2329, 0x2769, 0x2768, 0x276B, 0x276A, 0x276D,
102     0x276C, 0x276F, 0x276E, 0x2771, 0x2770, 0x2773, 0x2772, 0x2775, 0x2774,
103     0x27C4, 0x27C3, 0x27C6, 0x27C5, 0x27C9, 0x27C8, 0x27D6, 0x27D5, 0x27DE,
104     0x27DD, 0x27E3, 0x27E2, 0x27E5, 0x27E4, 0x27E7, 0x27E6, 0x27E9, 0x27E8,
105     0x27EB, 0x27EA, 0x27ED, 0x27EC, 0x27EF, 0x27EE, 0x2984, 0x2983, 0x2986,
106     0x2985, 0x2988, 0x2987, 0x298A, 0x2989, 0x298C, 0x298B, 0x2990, 0x298F,
107     0x298E, 0x298D, 0x2992, 0x2991, 0x2994, 0x2993, 0x2996, 0x2995, 0x2998,
108     0x2997, 0x2298, 0x29C1, 0x29C0, 0x29C5, 0x29C4, 0x29D0, 0x29CF, 0x29D2,
109     0x29D1, 0x29D5, 0x29D4, 0x29D9, 0x29D8, 0x29DB, 0x29DA, 0x2215, 0x29F9,
110     0x29F8, 0x29FD, 0x29FC, 0x2A2C, 0x2A2B, 0x2A2E, 0x2A2D, 0x2A35, 0x2A34,
111     0x2A3D, 0x2A3C, 0x2A65, 0x2A64, 0x2A7A, 0x2A79, 0x2A7E, 0x2A7D, 0x2A80,
112     0x2A7F, 0x2A82, 0x2A81, 0x2A84, 0x2A83, 0x2A8C, 0x2A8B, 0x2A92, 0x2A91,
113     0x2A94, 0x2A93, 0x2A96, 0x2A95, 0x2A98, 0x2A97, 0x2A9A, 0x2A99, 0x2A9C,
114     0x2A9B, 0x2AA2, 0x2AA1, 0x2AA7, 0x2AA6, 0x2AA9, 0x2AA8, 0x2AAB, 0x2AAA,
115     0x2AAD, 0x2AAC, 0x2AB0, 0x2AAF, 0x2AB4, 0x2AB3, 0x2ABC, 0x2ABB, 0x2ABE,
116     0x2ABD, 0x2AC0, 0x2ABF, 0x2AC2, 0x2AC1, 0x2AC4, 0x2AC3, 0x2AC6, 0x2AC5,
117     0x2ACE, 0x2ACD, 0x2AD0, 0x2ACF, 0x2AD2, 0x2AD1, 0x2AD4, 0x2AD3, 0x2AD6,
118     0x2AD5, 0x22A6, 0x22A9, 0x22A8, 0x22AB, 0x2AED, 0x2AEC, 0x2AF8, 0x2AF7,
119     0x2AFA, 0x2AF9, 0x2E03, 0x2E02, 0x2E05, 0x2E04, 0x2E0A, 0x2E09, 0x2E0D,
120     0x2E0C, 0x2E1D, 0x2E1C, 0x2E21, 0x2E20, 0x2E23, 0x2E22, 0x2E25, 0x2E24,
121     0x2E27, 0x2E26, 0x2E29, 0x2E28, 0x3009, 0x3008, 0x300B, 0x300A, 0x300D,
122     0x300C, 0x300F, 0x300E, 0x3011, 0x3010, 0x3015, 0x3014, 0x3017, 0x3016,
123     0x3019, 0x3018, 0x301B, 0x301A, 0xFE5A, 0xFE59, 0xFE5C, 0xFE5B, 0xFE5E,
124     0xFE5D, 0xFE65, 0xFE64, 0xFF09, 0xFF08, 0xFF1E, 0xFF1C, 0xFF3D, 0xFF3B,
125     0xFF5D, 0xFF5B, 0xFF60, 0xFF5F, 0xFF63, 0xFF62,
126 };
127 
128 constexpr size_t kFXTextLayoutBidiMirrorSize =
129     std::size(kFXTextLayoutBidiMirror);
130 
131 // Check that the mirror indicies in the fx_ucddata.inc table are in bounds.
132 #undef CHARPROP____
133 #define CHARPROP____(mirror, ct, bd, bt)                                      \
134   static_assert(mirror == kMirrorMax || mirror < kFXTextLayoutBidiMirrorSize, \
135                 "Bad mirror index");
136 #include "core/fxcrt/fx_ucddata.inc"  // NOLINT(build/include)
137 #undef CHARPROP____
138 
139 }  // namespace
140 
141 namespace pdfium {
142 namespace unicode {
143 
GetMirrorChar(wchar_t wch)144 wchar_t GetMirrorChar(wchar_t wch) {
145   uint16_t prop = GetUnicodeProperties(wch);
146   size_t idx = prop >> kMirrorBitPos;
147   if (idx == kMirrorMax)
148     return wch;
149   DCHECK(idx < kFXTextLayoutBidiMirrorSize);
150   return kFXTextLayoutBidiMirror[idx];
151 }
152 
GetBidiClass(wchar_t wch)153 FX_BIDICLASS GetBidiClass(wchar_t wch) {
154   uint16_t prop = GetUnicodeProperties(wch);
155   uint16_t result = (prop & kBidiClassBitMask) >> kBidiClassBitPos;
156   DCHECK(result <= static_cast<uint16_t>(FX_BIDICLASS::kPDF));
157   return static_cast<FX_BIDICLASS>(result);
158 }
159 
160 #ifdef PDF_ENABLE_XFA
GetCharType(wchar_t wch)161 FX_CHARTYPE GetCharType(wchar_t wch) {
162   uint16_t prop = GetExtendedUnicodeProperties(wch);
163   uint16_t result = (prop & kCharTypeBitMask) >> kCharTypeBitPos;
164   DCHECK(result <= static_cast<uint16_t>(FX_CHARTYPE::kArabic));
165   return static_cast<FX_CHARTYPE>(result);
166 }
167 
GetBreakProperty(wchar_t wch)168 FX_BREAKPROPERTY GetBreakProperty(wchar_t wch) {
169   uint16_t prop = GetExtendedUnicodeProperties(wch);
170   uint16_t result = (prop & kBreakTypeBitMask) >> kBreakTypeBitPos;
171   DCHECK(result <= static_cast<uint16_t>(FX_BREAKPROPERTY::kTB));
172   return static_cast<FX_BREAKPROPERTY>(result);
173 }
174 #endif  // PDF_ENABLE_XFA
175 
176 }  // namespace unicode
177 }  // namespace pdfium
178