1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/fx_bidi.h"
8
9 #include <algorithm>
10
11 #include "core/fxcrt/fx_unicode.h"
12 #include "third_party/base/ptr_util.h"
13
14 #ifdef PDF_ENABLE_XFA
15 #include "core/fxcrt/fx_extension.h"
16 #endif // PDF_ENABLE_XFA
17
18 namespace {
19
20 enum FX_BIDICLASS {
21 FX_BIDICLASS_ON = 0, // Other Neutral
22 FX_BIDICLASS_L = 1, // Left Letter
23 FX_BIDICLASS_R = 2, // Right Letter
24 FX_BIDICLASS_AN = 3, // Arabic Number
25 FX_BIDICLASS_EN = 4, // European Number
26 FX_BIDICLASS_AL = 5, // Arabic Letter
27 FX_BIDICLASS_NSM = 6, // Non-spacing Mark
28 FX_BIDICLASS_CS = 7, // Common Number Separator
29 FX_BIDICLASS_ES = 8, // European Separator
30 FX_BIDICLASS_ET = 9, // European Number Terminator
31 FX_BIDICLASS_BN = 10, // Boundary Neutral
32 FX_BIDICLASS_S = 11, // Segment Separator
33 FX_BIDICLASS_WS = 12, // Whitespace
34 FX_BIDICLASS_B = 13, // Paragraph Separator
35 FX_BIDICLASS_RLO = 14, // Right-to-Left Override
36 FX_BIDICLASS_RLE = 15, // Right-to-Left Embedding
37 FX_BIDICLASS_LRO = 16, // Left-to-Right Override
38 FX_BIDICLASS_LRE = 17, // Left-to-Right Embedding
39 FX_BIDICLASS_PDF = 18, // Pop Directional Format
40 FX_BIDICLASS_N = FX_BIDICLASS_ON,
41 };
42 constexpr uint32_t FX_BIDICLASSBITS = 6;
43 constexpr uint32_t FX_BIDICLASSBITSMASK = 0x1F << FX_BIDICLASSBITS;
44
45 #ifdef PDF_ENABLE_XFA
46
47 #ifndef NDEBUG
48 constexpr int32_t kBidiMaxLevel = 61;
49 #endif // NDEBUG
50
51 enum FX_BIDIWEAKSTATE {
52 FX_BWSxa = 0,
53 FX_BWSxr,
54 FX_BWSxl,
55 FX_BWSao,
56 FX_BWSro,
57 FX_BWSlo,
58 FX_BWSrt,
59 FX_BWSlt,
60 FX_BWScn,
61 FX_BWSra,
62 FX_BWSre,
63 FX_BWSla,
64 FX_BWSle,
65 FX_BWSac,
66 FX_BWSrc,
67 FX_BWSrs,
68 FX_BWSlc,
69 FX_BWSls,
70 FX_BWSret,
71 FX_BWSlet
72 };
73
74 enum FX_BIDIWEAKACTION {
75 FX_BWAIX = 0x100,
76 FX_BWAXX = 0x0F,
77 FX_BWAxxx = (0x0F << 4) + 0x0F,
78 FX_BWAxIx = 0x100 + FX_BWAxxx,
79 FX_BWAxxN = (0x0F << 4) + FX_BIDICLASS_ON,
80 FX_BWAxxE = (0x0F << 4) + FX_BIDICLASS_EN,
81 FX_BWAxxA = (0x0F << 4) + FX_BIDICLASS_AN,
82 FX_BWAxxR = (0x0F << 4) + FX_BIDICLASS_R,
83 FX_BWAxxL = (0x0F << 4) + FX_BIDICLASS_L,
84 FX_BWANxx = (FX_BIDICLASS_ON << 4) + 0x0F,
85 FX_BWAAxx = (FX_BIDICLASS_AN << 4) + 0x0F,
86 FX_BWAExE = (FX_BIDICLASS_EN << 4) + FX_BIDICLASS_EN,
87 FX_BWANIx = (FX_BIDICLASS_ON << 4) + 0x0F + 0x100,
88 FX_BWANxN = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_ON,
89 FX_BWANxR = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_R,
90 FX_BWANxE = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_EN,
91 FX_BWAAxA = (FX_BIDICLASS_AN << 4) + FX_BIDICLASS_AN,
92 FX_BWANxL = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_L,
93 FX_BWALxL = (FX_BIDICLASS_L << 4) + FX_BIDICLASS_L,
94 FX_BWAxIL = (0x0F << 4) + FX_BIDICLASS_L + 0x100,
95 FX_BWAAxR = (FX_BIDICLASS_AN << 4) + FX_BIDICLASS_R,
96 FX_BWALxx = (FX_BIDICLASS_L << 4) + 0x0F,
97 };
98
99 enum FX_BIDINEUTRALSTATE {
100 FX_BNSr = 0,
101 FX_BNSl,
102 FX_BNSrn,
103 FX_BNSln,
104 FX_BNSa,
105 FX_BNSna
106 };
107
108 enum FX_BIDINEUTRALACTION {
109 FX_BNAnL = FX_BIDICLASS_L,
110 FX_BNAEn = (FX_BIDICLASS_AN << 4),
111 FX_BNARn = (FX_BIDICLASS_R << 4),
112 FX_BNALn = (FX_BIDICLASS_L << 4),
113 FX_BNAIn = FX_BWAIX,
114 FX_BNALnL = (FX_BIDICLASS_L << 4) + FX_BIDICLASS_L,
115 };
116
117 const int32_t gc_FX_BidiNTypes[] = {
118 FX_BIDICLASS_N, FX_BIDICLASS_L, FX_BIDICLASS_R, FX_BIDICLASS_AN,
119 FX_BIDICLASS_EN, FX_BIDICLASS_AL, FX_BIDICLASS_NSM, FX_BIDICLASS_CS,
120 FX_BIDICLASS_ES, FX_BIDICLASS_ET, FX_BIDICLASS_BN, FX_BIDICLASS_BN,
121 FX_BIDICLASS_N, FX_BIDICLASS_B, FX_BIDICLASS_RLO, FX_BIDICLASS_RLE,
122 FX_BIDICLASS_LRO, FX_BIDICLASS_LRE, FX_BIDICLASS_PDF, FX_BIDICLASS_ON,
123 };
124
125 const int32_t gc_FX_BidiWeakStates[][10] = {
126 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSxa,
127 FX_BWSao, FX_BWSao, FX_BWSao},
128 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSxr,
129 FX_BWSro, FX_BWSro, FX_BWSrt},
130 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSxl,
131 FX_BWSlo, FX_BWSlo, FX_BWSlt},
132 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSao,
133 FX_BWSao, FX_BWSao, FX_BWSao},
134 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro,
135 FX_BWSro, FX_BWSro, FX_BWSrt},
136 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo,
137 FX_BWSlo, FX_BWSlo, FX_BWSlt},
138 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSrt,
139 FX_BWSro, FX_BWSro, FX_BWSrt},
140 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlt,
141 FX_BWSlo, FX_BWSlo, FX_BWSlt},
142 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWScn,
143 FX_BWSac, FX_BWSao, FX_BWSao},
144 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSra,
145 FX_BWSrc, FX_BWSro, FX_BWSrt},
146 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSre,
147 FX_BWSrs, FX_BWSrs, FX_BWSret},
148 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSla,
149 FX_BWSlc, FX_BWSlo, FX_BWSlt},
150 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSle,
151 FX_BWSls, FX_BWSls, FX_BWSlet},
152 {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSao,
153 FX_BWSao, FX_BWSao, FX_BWSao},
154 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro,
155 FX_BWSro, FX_BWSro, FX_BWSrt},
156 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro,
157 FX_BWSro, FX_BWSro, FX_BWSrt},
158 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo,
159 FX_BWSlo, FX_BWSlo, FX_BWSlt},
160 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo,
161 FX_BWSlo, FX_BWSlo, FX_BWSlt},
162 {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSret,
163 FX_BWSro, FX_BWSro, FX_BWSret},
164 {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlet,
165 FX_BWSlo, FX_BWSlo, FX_BWSlet},
166 };
167
168 const int32_t gc_FX_BidiWeakActions[][10] = {
169 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR,
170 FX_BWAxxR, FX_BWAxxN, FX_BWAxxN, FX_BWAxxN},
171 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR,
172 FX_BWAxxR, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx},
173 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR,
174 FX_BWAxxL, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx},
175 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR,
176 FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxxN},
177 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR,
178 FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx},
179 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR,
180 FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx},
181 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAExE, FX_BWANxR,
182 FX_BWAxIx, FX_BWANxN, FX_BWANxN, FX_BWAxIx},
183 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWALxL, FX_BWANxR,
184 FX_BWAxIx, FX_BWANxN, FX_BWANxN, FX_BWAxIx},
185 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR,
186 FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxxN},
187 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR,
188 FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxIx},
189 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR,
190 FX_BWAxxE, FX_BWAxIx, FX_BWAxIx, FX_BWAxxE},
191 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR,
192 FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxIx},
193 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR,
194 FX_BWAxxL, FX_BWAxIx, FX_BWAxIx, FX_BWAxxL},
195 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWAAxA, FX_BWANxR,
196 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANxN},
197 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWANxE, FX_BWANxR,
198 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx},
199 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAExE, FX_BWANxR,
200 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx},
201 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWANxL, FX_BWANxR,
202 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx},
203 {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWALxL, FX_BWANxR,
204 FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx},
205 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR,
206 FX_BWAxxE, FX_BWAxxN, FX_BWAxxN, FX_BWAxxE},
207 {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR,
208 FX_BWAxxL, FX_BWAxxN, FX_BWAxxN, FX_BWAxxL},
209 };
210
211 const int32_t gc_FX_BidiNeutralStates[][5] = {
212 {FX_BNSrn, FX_BNSl, FX_BNSr, FX_BNSr, FX_BNSr},
213 {FX_BNSln, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl},
214 {FX_BNSrn, FX_BNSl, FX_BNSr, FX_BNSr, FX_BNSr},
215 {FX_BNSln, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl},
216 {FX_BNSna, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl},
217 {FX_BNSna, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl},
218 };
219 const int32_t gc_FX_BidiNeutralActions[][5] = {
220 {FX_BNAIn, 0, 0, 0, 0},
221 {FX_BNAIn, 0, 0, 0, FX_BIDICLASS_L},
222 {FX_BNAIn, FX_BNAEn, FX_BNARn, FX_BNARn, FX_BNARn},
223 {FX_BNAIn, FX_BNALn, FX_BNAEn, FX_BNAEn, FX_BNALnL},
224 {FX_BNAIn, 0, 0, 0, FX_BIDICLASS_L},
225 {FX_BNAIn, FX_BNAEn, FX_BNARn, FX_BNARn, FX_BNAEn},
226 };
227
228 const int32_t gc_FX_BidiAddLevel[][4] = {
229 {0, 1, 2, 2},
230 {1, 0, 1, 1},
231 };
232
233 class CFX_BidiLine {
234 public:
BidiLine(std::vector<CFX_Char> * chars,size_t iCount)235 void BidiLine(std::vector<CFX_Char>* chars, size_t iCount) {
236 ASSERT(iCount <= chars->size());
237 if (iCount < 2)
238 return;
239
240 Classify(chars, iCount, false);
241 ResolveExplicit(chars, iCount);
242 ResolveWeak(chars, iCount);
243 ResolveNeutrals(chars, iCount);
244 ResolveImplicit(chars, iCount);
245 Classify(chars, iCount, true);
246 ResolveWhitespace(chars, iCount);
247 Reorder(chars, iCount);
248 Position(chars, iCount);
249 }
250
251 private:
Direction(int32_t val)252 int32_t Direction(int32_t val) {
253 return FX_IsOdd(val) ? FX_BIDICLASS_R : FX_BIDICLASS_L;
254 }
255
GetDeferredType(int32_t val)256 int32_t GetDeferredType(int32_t val) { return (val >> 4) & 0x0F; }
257
GetResolvedType(int32_t val)258 int32_t GetResolvedType(int32_t val) { return val & 0x0F; }
259
GetDeferredNeutrals(int32_t iAction,int32_t iLevel)260 int32_t GetDeferredNeutrals(int32_t iAction, int32_t iLevel) {
261 iAction = (iAction >> 4) & 0xF;
262 if (iAction == (FX_BNAEn >> 4))
263 return Direction(iLevel);
264 return iAction;
265 }
266
GetResolvedNeutrals(int32_t iAction)267 int32_t GetResolvedNeutrals(int32_t iAction) {
268 iAction &= 0xF;
269 return iAction == FX_BNAIn ? 0 : iAction;
270 }
271
ReverseString(std::vector<CFX_Char> * chars,size_t iStart,size_t iCount)272 void ReverseString(std::vector<CFX_Char>* chars,
273 size_t iStart,
274 size_t iCount) {
275 ASSERT(pdfium::IndexInBounds(*chars, iStart));
276 ASSERT(iStart + iCount <= chars->size());
277
278 std::reverse(chars->begin() + iStart, chars->begin() + iStart + iCount);
279 }
280
SetDeferredRun(std::vector<CFX_Char> * chars,bool bClass,size_t iStart,size_t iCount,int32_t iValue)281 void SetDeferredRun(std::vector<CFX_Char>* chars,
282 bool bClass,
283 size_t iStart,
284 size_t iCount,
285 int32_t iValue) {
286 ASSERT(iStart <= chars->size());
287 ASSERT(iStart >= iCount);
288
289 size_t iLast = iStart - iCount;
290 for (size_t i = iStart - 1; i >= iLast; --i) {
291 if (bClass)
292 (*chars)[i].m_iBidiClass = static_cast<int16_t>(iValue);
293 else
294 (*chars)[i].m_iBidiLevel = static_cast<int16_t>(iValue);
295
296 if (i == 0)
297 break;
298 }
299 }
300
Classify(std::vector<CFX_Char> * chars,size_t iCount,bool bWS)301 void Classify(std::vector<CFX_Char>* chars, size_t iCount, bool bWS) {
302 if (bWS) {
303 for (size_t i = 0; i < iCount; ++i) {
304 CFX_Char& cur = (*chars)[i];
305 cur.m_iBidiClass =
306 static_cast<int16_t>(cur.char_props() & FX_BIDICLASSBITSMASK) >>
307 FX_BIDICLASSBITS;
308 }
309 return;
310 }
311
312 for (size_t i = 0; i < iCount; ++i) {
313 CFX_Char& cur = (*chars)[i];
314 cur.m_iBidiClass = static_cast<int16_t>(
315 gc_FX_BidiNTypes[(cur.char_props() & FX_BIDICLASSBITSMASK) >>
316 FX_BIDICLASSBITS]);
317 }
318 }
319
ResolveExplicit(std::vector<CFX_Char> * chars,size_t iCount)320 void ResolveExplicit(std::vector<CFX_Char>* chars, size_t iCount) {
321 for (size_t i = 0; i < iCount; ++i)
322 (*chars)[i].m_iBidiLevel = 0;
323 }
324
ResolveWeak(std::vector<CFX_Char> * chars,size_t iCount)325 void ResolveWeak(std::vector<CFX_Char>* chars, size_t iCount) {
326 if (iCount <= 1)
327 return;
328 --iCount;
329
330 int32_t iLevelCur = 0;
331 int32_t iState = FX_BWSxl;
332 size_t i = 0;
333 size_t iNum = 0;
334 int32_t iClsCur;
335 int32_t iClsRun;
336 int32_t iClsNew;
337 int32_t iAction;
338 for (; i <= iCount; ++i) {
339 CFX_Char* pTC = &(*chars)[i];
340 iClsCur = pTC->m_iBidiClass;
341 if (iClsCur == FX_BIDICLASS_BN) {
342 pTC->m_iBidiLevel = (int16_t)iLevelCur;
343 if (i == iCount && iLevelCur != 0) {
344 iClsCur = Direction(iLevelCur);
345 pTC->m_iBidiClass = (int16_t)iClsCur;
346 } else if (i < iCount) {
347 CFX_Char* pTCNext = &(*chars)[i + 1];
348 int32_t iLevelNext, iLevelNew;
349 iClsNew = pTCNext->m_iBidiClass;
350 iLevelNext = pTCNext->m_iBidiLevel;
351 if (iClsNew != FX_BIDICLASS_BN && iLevelCur != iLevelNext) {
352 iLevelNew = std::max(iLevelNext, iLevelCur);
353 pTC->m_iBidiLevel = static_cast<int16_t>(iLevelNew);
354 iClsCur = Direction(iLevelNew);
355 pTC->m_iBidiClass = static_cast<int16_t>(iClsCur);
356 iLevelCur = iLevelNext;
357 } else {
358 if (iNum > 0)
359 ++iNum;
360 continue;
361 }
362 } else {
363 if (iNum > 0)
364 ++iNum;
365 continue;
366 }
367 }
368
369 ASSERT(iClsCur <= FX_BIDICLASS_BN);
370 iAction = gc_FX_BidiWeakActions[iState][iClsCur];
371 iClsRun = GetDeferredType(iAction);
372 if (iClsRun != FX_BWAXX && iNum > 0) {
373 SetDeferredRun(chars, true, i, iNum, iClsRun);
374 iNum = 0;
375 }
376 iClsNew = GetResolvedType(iAction);
377 if (iClsNew != FX_BWAXX)
378 pTC->m_iBidiClass = static_cast<int16_t>(iClsNew);
379 if (FX_BWAIX & iAction)
380 ++iNum;
381
382 iState = gc_FX_BidiWeakStates[iState][iClsCur];
383 }
384 if (iNum == 0)
385 return;
386
387 iClsCur = Direction(0);
388 iClsRun = GetDeferredType(gc_FX_BidiWeakActions[iState][iClsCur]);
389 if (iClsRun != FX_BWAXX)
390 SetDeferredRun(chars, true, i, iNum, iClsRun);
391 }
392
ResolveNeutrals(std::vector<CFX_Char> * chars,size_t iCount)393 void ResolveNeutrals(std::vector<CFX_Char>* chars, size_t iCount) {
394 if (iCount <= 1)
395 return;
396 --iCount;
397
398 CFX_Char* pTC;
399 int32_t iLevel = 0;
400 int32_t iState = FX_BNSl;
401 size_t i = 0;
402 size_t iNum = 0;
403 int32_t iClsCur;
404 int32_t iClsRun;
405 int32_t iClsNew;
406 int32_t iAction;
407 for (; i <= iCount; ++i) {
408 pTC = &(*chars)[i];
409 iClsCur = pTC->m_iBidiClass;
410 if (iClsCur == FX_BIDICLASS_BN) {
411 if (iNum)
412 ++iNum;
413 continue;
414 }
415
416 ASSERT(iClsCur < FX_BIDICLASS_AL);
417 iAction = gc_FX_BidiNeutralActions[iState][iClsCur];
418 iClsRun = GetDeferredNeutrals(iAction, iLevel);
419 if (iClsRun != FX_BIDICLASS_N && iNum > 0) {
420 SetDeferredRun(chars, true, i, iNum, iClsRun);
421 iNum = 0;
422 }
423
424 iClsNew = GetResolvedNeutrals(iAction);
425 if (iClsNew != FX_BIDICLASS_N)
426 pTC->m_iBidiClass = (int16_t)iClsNew;
427 if (FX_BNAIn & iAction)
428 ++iNum;
429
430 iState = gc_FX_BidiNeutralStates[iState][iClsCur];
431 iLevel = pTC->m_iBidiLevel;
432 }
433 if (iNum == 0)
434 return;
435
436 iClsCur = Direction(iLevel);
437 iClsRun =
438 GetDeferredNeutrals(gc_FX_BidiNeutralActions[iState][iClsCur], iLevel);
439 if (iClsRun != FX_BIDICLASS_N)
440 SetDeferredRun(chars, true, i, iNum, iClsRun);
441 }
442
ResolveImplicit(std::vector<CFX_Char> * chars,size_t iCount)443 void ResolveImplicit(std::vector<CFX_Char>* chars, size_t iCount) {
444 for (size_t i = 0; i < iCount; ++i) {
445 int32_t iCls = (*chars)[i].m_iBidiClass;
446 if (iCls == FX_BIDICLASS_BN)
447 continue;
448
449 ASSERT(iCls > FX_BIDICLASS_ON && iCls < FX_BIDICLASS_AL);
450 int32_t iLevel = (*chars)[i].m_iBidiLevel;
451 iLevel += gc_FX_BidiAddLevel[FX_IsOdd(iLevel)][iCls - 1];
452 (*chars)[i].m_iBidiLevel = (int16_t)iLevel;
453 }
454 }
455
ResolveWhitespace(std::vector<CFX_Char> * chars,size_t iCount)456 void ResolveWhitespace(std::vector<CFX_Char>* chars, size_t iCount) {
457 if (iCount <= 1)
458 return;
459 iCount--;
460
461 int32_t iLevel = 0;
462 size_t i = 0;
463 size_t iNum = 0;
464 for (; i <= iCount; ++i) {
465 switch ((*chars)[i].m_iBidiClass) {
466 case FX_BIDICLASS_WS:
467 ++iNum;
468 break;
469 case FX_BIDICLASS_RLE:
470 case FX_BIDICLASS_LRE:
471 case FX_BIDICLASS_LRO:
472 case FX_BIDICLASS_RLO:
473 case FX_BIDICLASS_PDF:
474 case FX_BIDICLASS_BN:
475 (*chars)[i].m_iBidiLevel = static_cast<int16_t>(iLevel);
476 ++iNum;
477 break;
478 case FX_BIDICLASS_S:
479 case FX_BIDICLASS_B:
480 if (iNum > 0)
481 SetDeferredRun(chars, false, i, iNum, 0);
482
483 (*chars)[i].m_iBidiLevel = 0;
484 iNum = 0;
485 break;
486 default:
487 iNum = 0;
488 break;
489 }
490 iLevel = (*chars)[i].m_iBidiLevel;
491 }
492 if (iNum > 0)
493 SetDeferredRun(chars, false, i, iNum, 0);
494 }
495
ReorderLevel(std::vector<CFX_Char> * chars,size_t iCount,int32_t iBaseLevel,size_t iStart,bool bReverse)496 size_t ReorderLevel(std::vector<CFX_Char>* chars,
497 size_t iCount,
498 int32_t iBaseLevel,
499 size_t iStart,
500 bool bReverse) {
501 ASSERT(iBaseLevel >= 0 && iBaseLevel <= kBidiMaxLevel);
502 ASSERT(iStart < iCount);
503
504 if (iCount < 1)
505 return 0;
506
507 bReverse = bReverse || FX_IsOdd(iBaseLevel);
508 size_t i = iStart;
509 for (; i < iCount; ++i) {
510 int32_t iLevel = (*chars)[i].m_iBidiLevel;
511 if (iLevel == iBaseLevel)
512 continue;
513 if (iLevel < iBaseLevel)
514 break;
515
516 i += ReorderLevel(chars, iCount, iBaseLevel + 1, i, bReverse) - 1;
517 }
518
519 size_t iNum = i - iStart;
520 if (bReverse && iNum > 1)
521 ReverseString(chars, iStart, iNum);
522
523 return iNum;
524 }
525
Reorder(std::vector<CFX_Char> * chars,size_t iCount)526 void Reorder(std::vector<CFX_Char>* chars, size_t iCount) {
527 for (size_t i = 0; i < iCount;)
528 i += ReorderLevel(chars, iCount, 0, i, false);
529 }
530
Position(std::vector<CFX_Char> * chars,size_t iCount)531 void Position(std::vector<CFX_Char>* chars, size_t iCount) {
532 for (size_t i = 0; i < iCount; ++i)
533 (*chars)[(*chars)[i].m_iBidiPos].m_iBidiOrder = i;
534 }
535 };
536
537 #endif // PDF_ENABLE_XFA
538
539 } // namespace
540
CFX_BidiChar()541 CFX_BidiChar::CFX_BidiChar()
542 : m_CurrentSegment({0, 0, NEUTRAL}), m_LastSegment({0, 0, NEUTRAL}) {}
543
AppendChar(wchar_t wch)544 bool CFX_BidiChar::AppendChar(wchar_t wch) {
545 uint32_t dwProps = FX_GetUnicodeProperties(wch);
546 int32_t iBidiCls = (dwProps & FX_BIDICLASSBITSMASK) >> FX_BIDICLASSBITS;
547 Direction direction = NEUTRAL;
548 switch (iBidiCls) {
549 case FX_BIDICLASS_L:
550 case FX_BIDICLASS_AN:
551 case FX_BIDICLASS_EN:
552 direction = LEFT;
553 break;
554 case FX_BIDICLASS_R:
555 case FX_BIDICLASS_AL:
556 direction = RIGHT;
557 break;
558 }
559
560 bool bChangeDirection = (direction != m_CurrentSegment.direction);
561 if (bChangeDirection)
562 StartNewSegment(direction);
563
564 m_CurrentSegment.count++;
565 return bChangeDirection;
566 }
567
EndChar()568 bool CFX_BidiChar::EndChar() {
569 StartNewSegment(NEUTRAL);
570 return m_LastSegment.count > 0;
571 }
572
StartNewSegment(CFX_BidiChar::Direction direction)573 void CFX_BidiChar::StartNewSegment(CFX_BidiChar::Direction direction) {
574 m_LastSegment = m_CurrentSegment;
575 m_CurrentSegment.start += m_CurrentSegment.count;
576 m_CurrentSegment.count = 0;
577 m_CurrentSegment.direction = direction;
578 }
579
CFX_BidiString(const WideString & str)580 CFX_BidiString::CFX_BidiString(const WideString& str)
581 : m_Str(str),
582 m_pBidiChar(pdfium::MakeUnique<CFX_BidiChar>()),
583 m_eOverallDirection(CFX_BidiChar::LEFT) {
584 for (const auto& c : m_Str) {
585 if (m_pBidiChar->AppendChar(c))
586 m_Order.push_back(m_pBidiChar->GetSegmentInfo());
587 }
588 if (m_pBidiChar->EndChar())
589 m_Order.push_back(m_pBidiChar->GetSegmentInfo());
590
591 size_t nR2L = std::count_if(m_Order.begin(), m_Order.end(),
592 [](const CFX_BidiChar::Segment& seg) {
593 return seg.direction == CFX_BidiChar::RIGHT;
594 });
595
596 size_t nL2R = std::count_if(m_Order.begin(), m_Order.end(),
597 [](const CFX_BidiChar::Segment& seg) {
598 return seg.direction == CFX_BidiChar::LEFT;
599 });
600
601 if (nR2L > 0 && nR2L >= nL2R)
602 SetOverallDirectionRight();
603 }
604
~CFX_BidiString()605 CFX_BidiString::~CFX_BidiString() {}
606
SetOverallDirectionRight()607 void CFX_BidiString::SetOverallDirectionRight() {
608 if (m_eOverallDirection != CFX_BidiChar::RIGHT) {
609 std::reverse(m_Order.begin(), m_Order.end());
610 m_eOverallDirection = CFX_BidiChar::RIGHT;
611 }
612 }
613
614 #ifdef PDF_ENABLE_XFA
FX_BidiLine(std::vector<CFX_Char> * chars,size_t iCount)615 void FX_BidiLine(std::vector<CFX_Char>* chars, size_t iCount) {
616 CFX_BidiLine blt;
617 blt.BidiLine(chars, iCount);
618 }
619 #endif // PDF_ENABLE_XFA
620