• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fgas/layout/fgas_arabic.h"
8 
9 #include <array>
10 #include <iterator>
11 
12 #include "core/fxcrt/fx_unicode.h"
13 #include "core/fxcrt/stl_util.h"
14 #include "xfa/fgas/layout/cfgas_char.h"
15 
16 namespace {
17 
18 struct FX_ARBFORMTABLE {
19   uint16_t wIsolated;
20   uint16_t wFinal;
21   uint16_t wInitial;
22   uint16_t wMedial;
23 };
24 
25 struct FX_ARAALEF {
26   uint16_t wAlef;
27   uint16_t wIsolated;
28 };
29 
30 constexpr auto kFormTable = fxcrt::ToArray<const FX_ARBFORMTABLE>({
31     {0xFE81, 0xFE82, 0xFE81, 0xFE82}, {0xFE83, 0xFE84, 0xFE83, 0xFE84},
32     {0xFE85, 0xFE86, 0xFE85, 0xFE86}, {0xFE87, 0xFE88, 0xFE87, 0xFE88},
33     {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, {0xFE8D, 0xFE8E, 0xFE8D, 0xFE8E},
34     {0xFE8F, 0xFE90, 0xFE91, 0xFE92}, {0xFE93, 0xFE94, 0xFE93, 0xFE94},
35     {0xFE95, 0xFE96, 0xFE97, 0xFE98}, {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C},
36     {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4},
37     {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, {0xFEA9, 0xFEAA, 0xFEA9, 0xFEAA},
38     {0xFEAB, 0xFEAC, 0xFEAB, 0xFEAC}, {0xFEAD, 0xFEAE, 0xFEAD, 0xFEAE},
39     {0xFEAF, 0xFEB0, 0xFEAF, 0xFEB0}, {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4},
40     {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC},
41     {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4},
42     {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, {0xFEC9, 0xFECA, 0xFECB, 0xFECC},
43     {0xFECD, 0xFECE, 0xFECF, 0xFED0}, {0x063B, 0x063B, 0x063B, 0x063B},
44     {0x063C, 0x063C, 0x063C, 0x063C}, {0x063D, 0x063D, 0x063D, 0x063D},
45     {0x063E, 0x063E, 0x063E, 0x063E}, {0x063F, 0x063F, 0x063F, 0x063F},
46     {0x0640, 0x0640, 0x0640, 0x0640}, {0xFED1, 0xFED2, 0xFED3, 0xFED4},
47     {0xFED5, 0xFED6, 0xFED7, 0xFED8}, {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC},
48     {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4},
49     {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC},
50     {0xFEED, 0xFEEE, 0xFEED, 0xFEEE}, {0xFEEF, 0xFEF0, 0xFBFE, 0xFBFF},
51     {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, {0x064B, 0x064B, 0x064B, 0x064B},
52     {0x064C, 0x064C, 0x064C, 0x064C}, {0x064D, 0x064D, 0x064D, 0x064D},
53     {0x064E, 0x064E, 0x064E, 0x064E}, {0x064F, 0x064F, 0x064F, 0x064F},
54     {0x0650, 0x0650, 0x0650, 0x0650}, {0x0651, 0x0651, 0x0651, 0x0651},
55     {0x0652, 0x0652, 0x0652, 0x0652}, {0x0653, 0x0653, 0x0653, 0x0653},
56     {0x0654, 0x0654, 0x0654, 0x0654}, {0x0655, 0x0655, 0x0655, 0x0655},
57     {0x0656, 0x0656, 0x0656, 0x0656}, {0x0657, 0x0657, 0x0657, 0x0657},
58     {0x0658, 0x0658, 0x0658, 0x0658}, {0x0659, 0x0659, 0x0659, 0x0659},
59     {0x065A, 0x065A, 0x065A, 0x065A}, {0x065B, 0x065B, 0x065B, 0x065B},
60     {0x065C, 0x065C, 0x065C, 0x065C}, {0x065D, 0x065D, 0x065D, 0x065D},
61     {0x065E, 0x065E, 0x065E, 0x065E}, {0x065F, 0x065F, 0x065F, 0x065F},
62     {0x0660, 0x0660, 0x0660, 0x0660}, {0x0661, 0x0661, 0x0661, 0x0661},
63     {0x0662, 0x0662, 0x0662, 0x0662}, {0x0663, 0x0663, 0x0663, 0x0663},
64     {0x0664, 0x0664, 0x0664, 0x0664}, {0x0665, 0x0665, 0x0665, 0x0665},
65     {0x0666, 0x0666, 0x0666, 0x0666}, {0x0667, 0x0667, 0x0667, 0x0667},
66     {0x0668, 0x0668, 0x0668, 0x0668}, {0x0669, 0x0669, 0x0669, 0x0669},
67     {0x066A, 0x066A, 0x066A, 0x066A}, {0x066B, 0x066B, 0x066B, 0x066B},
68     {0x066C, 0x066C, 0x066C, 0x066C}, {0x066D, 0x066D, 0x066D, 0x066D},
69     {0x066E, 0x066E, 0x066E, 0x066E}, {0x066F, 0x066F, 0x066F, 0x066F},
70     {0x0670, 0x0670, 0x0670, 0x0670}, {0xFB50, 0xFB51, 0xFB50, 0xFB51},
71     {0x0672, 0x0672, 0x0672, 0x0672}, {0x0673, 0x0673, 0x0673, 0x0673},
72     {0x0674, 0x0674, 0x0674, 0x0674}, {0x0675, 0x0675, 0x0675, 0x0675},
73     {0x0676, 0x0676, 0x0676, 0x0676}, {0x0677, 0x0677, 0x0677, 0x0677},
74     {0x0678, 0x0678, 0x0678, 0x0678}, {0xFB66, 0xFB67, 0xFB68, 0xFB69},
75     {0xFB5E, 0xFB5F, 0xFB60, 0xFB61}, {0xFB52, 0xFB53, 0xFB54, 0xFB55},
76     {0x067C, 0x067C, 0x067C, 0x067C}, {0x067D, 0x067D, 0x067D, 0x067D},
77     {0xFB56, 0xFB57, 0xFB58, 0xFB59}, {0xFB62, 0xFB63, 0xFB64, 0xFB65},
78     {0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D}, {0x0681, 0x0681, 0x0681, 0x0681},
79     {0x0682, 0x0682, 0x0682, 0x0682}, {0xFB76, 0xFB77, 0xFB78, 0xFB79},
80     {0xFB72, 0xFB73, 0xFB74, 0xFB75}, {0x0685, 0x0685, 0x0685, 0x0685},
81     {0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D}, {0xFB7E, 0xFB7F, 0xFB80, 0xFB81},
82     {0xFB88, 0xFB89, 0xFB88, 0xFB89}, {0x0689, 0x0689, 0x0689, 0x0689},
83     {0x068A, 0x068A, 0x068A, 0x068A}, {0x068B, 0x068B, 0x068B, 0x068B},
84     {0xFB84, 0xFB85, 0xFB84, 0xFB85}, {0xFB82, 0xFB83, 0xFB82, 0xFB83},
85     {0xFB86, 0xFB87, 0xFB86, 0xFB87}, {0x068F, 0x068F, 0x068F, 0x068F},
86     {0x0690, 0x0690, 0x0690, 0x0690}, {0xFB8C, 0xFB8D, 0xFB8C, 0xFB8D},
87     {0x0692, 0x0692, 0x0692, 0x0692}, {0x0693, 0x0693, 0x0693, 0x0693},
88     {0x0694, 0x0694, 0x0694, 0x0694}, {0x0695, 0x0695, 0x0695, 0x0695},
89     {0x0696, 0x0696, 0x0696, 0x0696}, {0x0697, 0x0697, 0x0697, 0x0697},
90     {0xFB8A, 0xFB8B, 0xFB8A, 0xFB8B}, {0x0699, 0x0699, 0x0699, 0x0699},
91     {0x069A, 0x069A, 0x069A, 0x069A}, {0x069B, 0x069B, 0x069B, 0x069B},
92     {0x069C, 0x069C, 0x069C, 0x069C}, {0x069D, 0x069D, 0x069D, 0x069D},
93     {0x069E, 0x069E, 0x069E, 0x069E}, {0x069F, 0x069F, 0x069F, 0x069F},
94     {0x06A0, 0x06A0, 0x06A0, 0x06A0}, {0x06A1, 0x06A1, 0x06A1, 0x06A1},
95     {0x06A2, 0x06A2, 0x06A2, 0x06A2}, {0x06A3, 0x06A3, 0x06A3, 0x06A3},
96     {0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D}, {0x06A5, 0x06A5, 0x06A5, 0x06A5},
97     {0xFB6E, 0xFB6F, 0xFB70, 0xFB71}, {0x06A7, 0x06A7, 0x06A7, 0x06A7},
98     {0x06A8, 0x06A8, 0x06A8, 0x06A8}, {0xFB8E, 0xFB8F, 0xFB90, 0xFB91},
99     {0x06AA, 0x06AA, 0x06AA, 0x06AA}, {0x06AB, 0x06AB, 0x06AB, 0x06AB},
100     {0x06AC, 0x06AC, 0x06AC, 0x06AC}, {0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6},
101     {0x06AE, 0x06AE, 0x06AE, 0x06AE}, {0xFB92, 0xFB93, 0xFB94, 0xFB95},
102     {0x06B0, 0x06B0, 0x06B0, 0x06B0}, {0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D},
103     {0x06B2, 0x06B2, 0x06B2, 0x06B2}, {0xFB96, 0xFB97, 0xFB98, 0xFB99},
104     {0x06B4, 0x06B4, 0x06B4, 0x06B4}, {0x06B5, 0x06B5, 0x06B5, 0x06B5},
105     {0x06B6, 0x06B6, 0x06B6, 0x06B6}, {0x06B7, 0x06B7, 0x06B7, 0x06B7},
106     {0x06B8, 0x06B8, 0x06B8, 0x06B8}, {0x06B9, 0x06B9, 0x06B9, 0x06B9},
107     {0xFB9E, 0xFB9F, 0xFBE8, 0xFBE9}, {0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3},
108     {0x06BC, 0x06BC, 0x06BC, 0x06BC}, {0x06BD, 0x06BD, 0x06BD, 0x06BD},
109     {0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD}, {0x06BF, 0x06BF, 0x06BF, 0x06BF},
110     {0xFBA4, 0xFBA5, 0xFBA4, 0xFBA5}, {0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9},
111     {0x06C2, 0x06C2, 0x06C2, 0x06C2}, {0x06C3, 0x06C3, 0x06C3, 0x06C3},
112     {0x06C4, 0x06C4, 0x06C4, 0x06C4}, {0xFBE0, 0xFBE1, 0xFBE0, 0xFBE1},
113     {0xFBD9, 0xFBDA, 0xFBD9, 0xFBDA}, {0xFBD7, 0xFBD8, 0xFBD7, 0xFBD8},
114     {0xFBDB, 0xFBDC, 0xFBDB, 0xFBDC}, {0xFBE2, 0xFBE3, 0xFBE2, 0xFBE3},
115     {0x06CA, 0x06CA, 0x06CA, 0x06CA}, {0xFBDE, 0xFBDF, 0xFBDE, 0xFBDF},
116     {0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF}, {0x06CD, 0x06CD, 0x06CD, 0x06CD},
117     {0x06CE, 0x06CE, 0x06CE, 0x06CE}, {0x06CF, 0x06CF, 0x06CF, 0x06CF},
118     {0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7}, {0x06D1, 0x06D1, 0x06D1, 0x06D1},
119     {0xFBAE, 0xFBAF, 0xFBAE, 0xFBAF}, {0xFBB0, 0xFBB1, 0xFBB0, 0xFBB1},
120     {0x06D4, 0x06D4, 0x06D4, 0x06D4}, {0x06D5, 0x06D5, 0x06D5, 0x06D5},
121 });
122 constexpr uint16_t kFirstFormTableEntry = 0x0622;
123 constexpr uint16_t kLastFormTableEntry =
124     kFirstFormTableEntry + std::size(kFormTable) - 1;
125 
126 constexpr FX_ARAALEF kAlefTable[] = {
127     {0x0622, 0xFEF5},
128     {0x0623, 0xFEF7},
129     {0x0625, 0xFEF9},
130     {0x0627, 0xFEFB},
131 };
132 
133 constexpr auto kShaddaTable = fxcrt::ToArray<const uint16_t>({
134     0xFC5E,
135     0xFC5F,
136     0xFC60,
137     0xFC61,
138     0xFC62,
139 });
140 constexpr uint16_t kFirstShaddaTableEntry = 0x064c;
141 constexpr uint16_t kLastShaddaTableEntry =
142     kFirstShaddaTableEntry + std::size(kShaddaTable) - 1;
143 
GetArabicFormTable(wchar_t unicode)144 const FX_ARBFORMTABLE* GetArabicFormTable(wchar_t unicode) {
145   if (unicode < kFirstFormTableEntry || unicode > kLastFormTableEntry)
146     return nullptr;
147 
148   return &kFormTable[unicode - kFirstFormTableEntry];
149 }
150 
ParseChar(const CFGAS_Char * pTC,wchar_t * wChar,FX_CHARTYPE * eType)151 const FX_ARBFORMTABLE* ParseChar(const CFGAS_Char* pTC,
152                                  wchar_t* wChar,
153                                  FX_CHARTYPE* eType) {
154   if (!pTC) {
155     *eType = FX_CHARTYPE::kUnknown;
156     *wChar = pdfium::unicode::kZeroWidthNoBreakSpace;
157     return nullptr;
158   }
159 
160   *eType = pTC->GetCharType();
161   *wChar = static_cast<wchar_t>(pTC->char_code());
162   const FX_ARBFORMTABLE* pFT = GetArabicFormTable(*wChar);
163   if (!pFT || *eType >= FX_CHARTYPE::kArabicNormal)
164     *eType = FX_CHARTYPE::kUnknown;
165 
166   return pFT;
167 }
168 
GetArabicFromAlefTable(wchar_t alef)169 wchar_t GetArabicFromAlefTable(wchar_t alef) {
170   for (const FX_ARAALEF& v : kAlefTable) {
171     if (v.wAlef == alef)
172       return v.wIsolated;
173   }
174   return alef;
175 }
176 
177 }  // namespace
178 
179 namespace pdfium {
180 
GetArabicFormChar(wchar_t wch,wchar_t prev,wchar_t next)181 wchar_t GetArabicFormChar(wchar_t wch, wchar_t prev, wchar_t next) {
182   CFGAS_Char c(wch);
183   CFGAS_Char p(prev);
184   CFGAS_Char n(next);
185   return GetArabicFormChar(&c, &p, &n);
186 }
187 
GetArabicFormChar(const CFGAS_Char * cur,const CFGAS_Char * prev,const CFGAS_Char * next)188 wchar_t GetArabicFormChar(const CFGAS_Char* cur,
189                           const CFGAS_Char* prev,
190                           const CFGAS_Char* next) {
191   FX_CHARTYPE eCur;
192   wchar_t wCur;
193   const FX_ARBFORMTABLE* ft = ParseChar(cur, &wCur, &eCur);
194   if (eCur < FX_CHARTYPE::kArabicAlef || eCur >= FX_CHARTYPE::kArabicNormal)
195     return wCur;
196 
197   FX_CHARTYPE ePrev;
198   wchar_t wPrev;
199   ParseChar(prev, &wPrev, &ePrev);
200   if (wPrev == kArabicLetterLam && eCur == FX_CHARTYPE::kArabicAlef)
201     return pdfium::unicode::kZeroWidthNoBreakSpace;
202 
203   FX_CHARTYPE eNext;
204   wchar_t wNext;
205   ParseChar(next, &wNext, &eNext);
206   bool bAlef = (eNext == FX_CHARTYPE::kArabicAlef && wCur == kArabicLetterLam);
207   if (ePrev < FX_CHARTYPE::kArabicAlef) {
208     if (bAlef)
209       return GetArabicFromAlefTable(wNext);
210     return (eNext < FX_CHARTYPE::kArabicAlef) ? ft->wIsolated : ft->wInitial;
211   }
212 
213   if (bAlef) {
214     wCur = GetArabicFromAlefTable(wNext);
215     return (ePrev != FX_CHARTYPE::kArabicDistortion) ? wCur : ++wCur;
216   }
217 
218   if (ePrev == FX_CHARTYPE::kArabicAlef || ePrev == FX_CHARTYPE::kArabicSpecial)
219     return (eNext < FX_CHARTYPE::kArabicAlef) ? ft->wIsolated : ft->wInitial;
220   return (eNext < FX_CHARTYPE::kArabicAlef) ? ft->wFinal : ft->wMedial;
221 }
222 
GetArabicFromShaddaTable(wchar_t shadda)223 std::optional<wchar_t> GetArabicFromShaddaTable(wchar_t shadda) {
224   if (shadda < kFirstShaddaTableEntry || shadda > kLastShaddaTableEntry)
225     return std::nullopt;
226 
227   return kShaddaTable[shadda - kFirstShaddaTableEntry];
228 }
229 
230 }  // namespace pdfium
231