1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8 * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 */
22
23 #include "fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
24
25 #include <array>
26
27 #include "core/fxcrt/fx_extension.h"
28 #include "core/fxcrt/fx_string.h"
29 #include "third_party/bigint/BigIntegerLibrary.hh"
30
31 namespace {
32
33 constexpr int16_t kLatchToText = 900;
34 constexpr int16_t kLatchToBytePadded = 901;
35 constexpr int16_t kLatchToNumeric = 902;
36 constexpr int16_t kShiftToByte = 913;
37 constexpr int16_t kLatchToByte = 924;
38
39 constexpr std::array<const int8_t, 128> kMixed = {
40 {-1, -1, -1, -1, -1, -1, -1, -1, -1, 12, -1, -1, -1, 11, -1, -1, -1, -1, -1,
41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, 15, 18, 21,
42 10, -1, -1, -1, 22, 20, 13, 16, 17, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8,
43 9, 14, -1, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 24,
45 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
46 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}};
47
48 constexpr std::array<const int8_t, 128> kPunctuation = {
49 {-1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 15, -1, -1, 11, -1, -1, -1, -1, -1,
50 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 20, -1, 18, -1,
51 -1, 28, 23, 24, 22, -1, 13, 16, 17, 19, -1, -1, -1, -1, -1, -1, -1, -1, -1,
52 -1, 14, 0, 1, -1, 2, 25, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
53 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, 5, 6, -1,
54 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
55 -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 21, 27, 9, -1}};
56
IsAlphaUpperOrSpace(wchar_t ch)57 bool IsAlphaUpperOrSpace(wchar_t ch) {
58 return ch == ' ' || FXSYS_IsUpperASCII(ch);
59 }
60
IsAlphaLowerOrSpace(wchar_t ch)61 bool IsAlphaLowerOrSpace(wchar_t ch) {
62 return ch == ' ' || FXSYS_IsLowerASCII(ch);
63 }
64
IsMixed(wchar_t ch)65 bool IsMixed(wchar_t ch) {
66 // Bounds check avoiding sign mismatch error given questionable signedness.
67 return !((ch & ~0x7F) || kMixed[ch] == -1);
68 }
69
IsPunctuation(wchar_t ch)70 bool IsPunctuation(wchar_t ch) {
71 // Bounds check avoiding sign mismatch error given questionable signedness.
72 return !((ch & ~0x7F) || kPunctuation[ch] == -1);
73 }
74
IsText(wchar_t ch)75 bool IsText(wchar_t ch) {
76 return (ch >= 32 && ch <= 126) || ch == '\t' || ch == '\n' || ch == '\r';
77 }
78
79 } // namespace
80
81 // static
EncodeHighLevel(WideStringView msg)82 std::optional<WideString> CBC_PDF417HighLevelEncoder::EncodeHighLevel(
83 WideStringView msg) {
84 const ByteString bytes = FX_UTF8Encode(msg);
85 size_t len = bytes.GetLength();
86 WideString result;
87 result.Reserve(len);
88 for (size_t i = 0; i < len; i++) {
89 wchar_t ch = bytes[i] & 0xff;
90 if (ch == '?' && bytes[i] != '?')
91 return std::nullopt;
92
93 result += ch;
94 }
95 len = result.GetLength();
96 WideString sb;
97 sb.Reserve(len);
98 size_t p = 0;
99 SubMode textSubMode = SubMode::kAlpha;
100 EncodingMode encodingMode = EncodingMode::kUnknown;
101 while (p < len) {
102 size_t n = DetermineConsecutiveDigitCount(result, p);
103 if (n >= 13) {
104 sb += kLatchToNumeric;
105 encodingMode = EncodingMode::kNumeric;
106 textSubMode = SubMode::kAlpha;
107 EncodeNumeric(result, p, n, &sb);
108 p += n;
109 } else {
110 size_t t = DetermineConsecutiveTextCount(result, p);
111 if (t >= 5 || n == len) {
112 if (encodingMode != EncodingMode::kText) {
113 sb += kLatchToText;
114 encodingMode = EncodingMode::kText;
115 textSubMode = SubMode::kAlpha;
116 }
117 textSubMode = EncodeText(result, p, t, textSubMode, &sb);
118 p += t;
119 } else {
120 std::optional<size_t> b =
121 DetermineConsecutiveBinaryCount(result, bytes.unsigned_span(), p);
122 if (!b.has_value())
123 return std::nullopt;
124
125 size_t b_value = b.value();
126 if (b_value == 0)
127 b_value = 1;
128 if (b_value == 1 && encodingMode == EncodingMode::kText) {
129 EncodeBinary(bytes.unsigned_span(), p, 1, EncodingMode::kText, &sb);
130 } else {
131 EncodeBinary(bytes.unsigned_span(), p, b_value, encodingMode, &sb);
132 encodingMode = EncodingMode::kByte;
133 textSubMode = SubMode::kAlpha;
134 }
135 p += b_value;
136 }
137 }
138 }
139 return sb;
140 }
141
EncodeText(const WideString & msg,size_t startpos,size_t count,SubMode initialSubmode,WideString * sb)142 CBC_PDF417HighLevelEncoder::SubMode CBC_PDF417HighLevelEncoder::EncodeText(
143 const WideString& msg,
144 size_t startpos,
145 size_t count,
146 SubMode initialSubmode,
147 WideString* sb) {
148 WideString tmp;
149 tmp.Reserve(count);
150 SubMode submode = initialSubmode;
151 size_t idx = 0;
152 while (idx < count) {
153 wchar_t ch = msg[startpos + idx];
154 switch (submode) {
155 case SubMode::kAlpha:
156 if (IsAlphaUpperOrSpace(ch)) {
157 if (ch == ' ')
158 tmp += 26;
159 else
160 tmp += ch - 65;
161 break;
162 }
163 if (IsAlphaLowerOrSpace(ch)) {
164 submode = SubMode::kLower;
165 tmp += 27;
166 continue;
167 }
168 if (IsMixed(ch)) {
169 submode = SubMode::kMixed;
170 tmp += 28;
171 continue;
172 }
173 if (IsPunctuation(ch)) {
174 tmp += 29;
175 tmp += kPunctuation[ch];
176 }
177 break;
178 case SubMode::kLower:
179 if (IsAlphaLowerOrSpace(ch)) {
180 if (ch == ' ')
181 tmp += 26;
182 else
183 tmp += ch - 97;
184 break;
185 }
186 if (IsAlphaUpperOrSpace(ch)) {
187 tmp += 27;
188 tmp += ch - 65;
189 break;
190 }
191 if (IsMixed(ch)) {
192 submode = SubMode::kMixed;
193 tmp += 28;
194 continue;
195 }
196 if (IsPunctuation(ch)) {
197 tmp += 29;
198 tmp += kPunctuation[ch];
199 }
200 break;
201 case SubMode::kMixed:
202 if (IsMixed(ch)) {
203 tmp += kMixed[ch];
204 break;
205 }
206 if (IsAlphaUpperOrSpace(ch)) {
207 submode = SubMode::kAlpha;
208 tmp += 28;
209 continue;
210 }
211 if (IsAlphaLowerOrSpace(ch)) {
212 submode = SubMode::kLower;
213 tmp += 27;
214 continue;
215 }
216 if (startpos + idx + 1 < count) {
217 wchar_t next = msg[startpos + idx + 1];
218 if (IsPunctuation(next)) {
219 submode = SubMode::kPunctuation;
220 tmp += 25;
221 continue;
222 }
223 }
224 if (IsPunctuation(ch)) {
225 tmp += 29;
226 tmp += kPunctuation[ch];
227 }
228 break;
229 default:
230 if (IsPunctuation(ch)) {
231 tmp += kPunctuation[ch];
232 break;
233 }
234 submode = SubMode::kAlpha;
235 tmp += 29;
236 continue;
237 }
238 ++idx;
239 }
240 wchar_t h = 0;
241 size_t len = tmp.GetLength();
242 for (size_t i = 0; i < len; i++) {
243 bool odd = (i % 2) != 0;
244 if (odd) {
245 h = (h * 30) + tmp[i];
246 *sb += h;
247 } else {
248 h = tmp[i];
249 }
250 }
251 if ((len % 2) != 0)
252 *sb += (h * 30) + 29;
253 return submode;
254 }
255
EncodeBinary(pdfium::span<const uint8_t> bytes,size_t startpos,size_t count,EncodingMode startmode,WideString * sb)256 void CBC_PDF417HighLevelEncoder::EncodeBinary(pdfium::span<const uint8_t> bytes,
257 size_t startpos,
258 size_t count,
259 EncodingMode startmode,
260 WideString* sb) {
261 if (count == 1 && startmode == EncodingMode::kText)
262 *sb += kShiftToByte;
263
264 size_t idx = startpos;
265 if (count >= 6) {
266 *sb += kLatchToByte;
267 std::array<wchar_t, 5> chars;
268 while ((startpos + count - idx) >= 6) {
269 int64_t t = 0;
270 for (size_t i = 0; i < 6; i++) {
271 t <<= 8;
272 t += bytes[idx + i] & 0xff;
273 }
274 for (size_t i = 0; i < 5; i++) {
275 chars[i] = (t % 900);
276 t /= 900;
277 }
278 for (size_t i = 5; i >= 1; i--)
279 *sb += (chars[i - 1]);
280 idx += 6;
281 }
282 }
283 if (idx < startpos + count)
284 *sb += kLatchToBytePadded;
285 for (size_t i = idx; i < startpos + count; i++) {
286 int32_t ch = bytes[i] & 0xff;
287 *sb += ch;
288 }
289 }
290
EncodeNumeric(const WideString & msg,size_t startpos,size_t count,WideString * sb)291 void CBC_PDF417HighLevelEncoder::EncodeNumeric(const WideString& msg,
292 size_t startpos,
293 size_t count,
294 WideString* sb) {
295 size_t idx = 0;
296 BigInteger num900 = 900;
297 while (idx < count) {
298 WideString tmp;
299 size_t len = 44 < count - idx ? 44 : count - idx;
300 ByteString part = (L'1' + msg.Substr(startpos + idx, len)).ToUTF8();
301 BigInteger bigint = stringToBigInteger(part.c_str());
302 do {
303 int32_t c = (bigint % num900).toInt();
304 tmp += c;
305 bigint = bigint / num900;
306 } while (!bigint.isZero());
307 for (size_t i = tmp.GetLength(); i >= 1; i--)
308 *sb += tmp[i - 1];
309 idx += len;
310 }
311 }
312
DetermineConsecutiveDigitCount(WideString msg,size_t startpos)313 size_t CBC_PDF417HighLevelEncoder::DetermineConsecutiveDigitCount(
314 WideString msg,
315 size_t startpos) {
316 size_t count = 0;
317 size_t len = msg.GetLength();
318 size_t idx = startpos;
319 if (idx < len) {
320 wchar_t ch = msg[idx];
321 while (FXSYS_IsDecimalDigit(ch) && idx < len) {
322 count++;
323 idx++;
324 if (idx < len)
325 ch = msg[idx];
326 }
327 }
328 return count;
329 }
330
DetermineConsecutiveTextCount(WideString msg,size_t startpos)331 size_t CBC_PDF417HighLevelEncoder::DetermineConsecutiveTextCount(
332 WideString msg,
333 size_t startpos) {
334 size_t len = msg.GetLength();
335 size_t idx = startpos;
336 while (idx < len) {
337 wchar_t ch = msg[idx];
338 size_t numericCount = 0;
339 while (numericCount < 13 && FXSYS_IsDecimalDigit(ch) && idx < len) {
340 numericCount++;
341 idx++;
342 if (idx < len)
343 ch = msg[idx];
344 }
345 if (numericCount >= 13)
346 return idx - startpos - numericCount;
347 if (numericCount > 0)
348 continue;
349 ch = msg[idx];
350 if (!IsText(ch))
351 break;
352 idx++;
353 }
354 return idx - startpos;
355 }
356
357 std::optional<size_t>
DetermineConsecutiveBinaryCount(WideString msg,pdfium::span<const uint8_t> bytes,size_t startpos)358 CBC_PDF417HighLevelEncoder::DetermineConsecutiveBinaryCount(
359 WideString msg,
360 pdfium::span<const uint8_t> bytes,
361 size_t startpos) {
362 size_t len = msg.GetLength();
363 size_t idx = startpos;
364 while (idx < len) {
365 wchar_t ch = msg[idx];
366 size_t numericCount = 0;
367 while (numericCount < 13 && FXSYS_IsDecimalDigit(ch)) {
368 numericCount++;
369 size_t i = idx + numericCount;
370 if (i >= len)
371 break;
372 ch = msg[i];
373 }
374 if (numericCount >= 13)
375 return idx - startpos;
376
377 size_t textCount = 0;
378 while (textCount < 5 && IsText(ch)) {
379 textCount++;
380 size_t i = idx + textCount;
381 if (i >= len)
382 break;
383 ch = msg[i];
384 }
385 if (textCount >= 5)
386 return idx - startpos;
387 ch = msg[idx];
388 if (bytes[idx] == 63 && ch != '?')
389 return std::nullopt;
390 idx++;
391 }
392 return idx - startpos;
393 }
394