• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8  * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include "fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
24 
25 #include <array>
26 
27 #include "core/fxcrt/fx_extension.h"
28 #include "core/fxcrt/fx_string.h"
29 #include "third_party/bigint/BigIntegerLibrary.hh"
30 
31 namespace {
32 
33 constexpr int16_t kLatchToText = 900;
34 constexpr int16_t kLatchToBytePadded = 901;
35 constexpr int16_t kLatchToNumeric = 902;
36 constexpr int16_t kShiftToByte = 913;
37 constexpr int16_t kLatchToByte = 924;
38 
39 constexpr std::array<const int8_t, 128> kMixed = {
40     {-1, -1, -1, -1, -1, -1, -1, -1, -1, 12, -1, -1, -1, 11, -1, -1, -1, -1, -1,
41      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, 15, 18, 21,
42      10, -1, -1, -1, 22, 20, 13, 16, 17, 19, 0,  1,  2,  3,  4,  5,  6,  7,  8,
43      9,  14, -1, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
44      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 24,
45      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
46      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}};
47 
48 constexpr std::array<const int8_t, 128> kPunctuation = {
49     {-1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 15, -1, -1, 11, -1, -1, -1, -1, -1,
50      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 20, -1, 18, -1,
51      -1, 28, 23, 24, 22, -1, 13, 16, 17, 19, -1, -1, -1, -1, -1, -1, -1, -1, -1,
52      -1, 14, 0,  1,  -1, 2,  25, 3,  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
53      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4,  5,  6,  -1,
54      7,  8,  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
55      -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 21, 27, 9,  -1}};
56 
IsAlphaUpperOrSpace(wchar_t ch)57 bool IsAlphaUpperOrSpace(wchar_t ch) {
58   return ch == ' ' || FXSYS_IsUpperASCII(ch);
59 }
60 
IsAlphaLowerOrSpace(wchar_t ch)61 bool IsAlphaLowerOrSpace(wchar_t ch) {
62   return ch == ' ' || FXSYS_IsLowerASCII(ch);
63 }
64 
IsMixed(wchar_t ch)65 bool IsMixed(wchar_t ch) {
66   // Bounds check avoiding sign mismatch error given questionable signedness.
67   return !((ch & ~0x7F) || kMixed[ch] == -1);
68 }
69 
IsPunctuation(wchar_t ch)70 bool IsPunctuation(wchar_t ch) {
71   // Bounds check avoiding sign mismatch error given questionable signedness.
72   return !((ch & ~0x7F) || kPunctuation[ch] == -1);
73 }
74 
IsText(wchar_t ch)75 bool IsText(wchar_t ch) {
76   return (ch >= 32 && ch <= 126) || ch == '\t' || ch == '\n' || ch == '\r';
77 }
78 
79 }  // namespace
80 
81 // static
EncodeHighLevel(WideStringView msg)82 std::optional<WideString> CBC_PDF417HighLevelEncoder::EncodeHighLevel(
83     WideStringView msg) {
84   const ByteString bytes = FX_UTF8Encode(msg);
85   size_t len = bytes.GetLength();
86   WideString result;
87   result.Reserve(len);
88   for (size_t i = 0; i < len; i++) {
89     wchar_t ch = bytes[i] & 0xff;
90     if (ch == '?' && bytes[i] != '?')
91       return std::nullopt;
92 
93     result += ch;
94   }
95   len = result.GetLength();
96   WideString sb;
97   sb.Reserve(len);
98   size_t p = 0;
99   SubMode textSubMode = SubMode::kAlpha;
100   EncodingMode encodingMode = EncodingMode::kUnknown;
101   while (p < len) {
102     size_t n = DetermineConsecutiveDigitCount(result, p);
103     if (n >= 13) {
104       sb += kLatchToNumeric;
105       encodingMode = EncodingMode::kNumeric;
106       textSubMode = SubMode::kAlpha;
107       EncodeNumeric(result, p, n, &sb);
108       p += n;
109     } else {
110       size_t t = DetermineConsecutiveTextCount(result, p);
111       if (t >= 5 || n == len) {
112         if (encodingMode != EncodingMode::kText) {
113           sb += kLatchToText;
114           encodingMode = EncodingMode::kText;
115           textSubMode = SubMode::kAlpha;
116         }
117         textSubMode = EncodeText(result, p, t, textSubMode, &sb);
118         p += t;
119       } else {
120         std::optional<size_t> b =
121             DetermineConsecutiveBinaryCount(result, bytes.unsigned_span(), p);
122         if (!b.has_value())
123           return std::nullopt;
124 
125         size_t b_value = b.value();
126         if (b_value == 0)
127           b_value = 1;
128         if (b_value == 1 && encodingMode == EncodingMode::kText) {
129           EncodeBinary(bytes.unsigned_span(), p, 1, EncodingMode::kText, &sb);
130         } else {
131           EncodeBinary(bytes.unsigned_span(), p, b_value, encodingMode, &sb);
132           encodingMode = EncodingMode::kByte;
133           textSubMode = SubMode::kAlpha;
134         }
135         p += b_value;
136       }
137     }
138   }
139   return sb;
140 }
141 
EncodeText(const WideString & msg,size_t startpos,size_t count,SubMode initialSubmode,WideString * sb)142 CBC_PDF417HighLevelEncoder::SubMode CBC_PDF417HighLevelEncoder::EncodeText(
143     const WideString& msg,
144     size_t startpos,
145     size_t count,
146     SubMode initialSubmode,
147     WideString* sb) {
148   WideString tmp;
149   tmp.Reserve(count);
150   SubMode submode = initialSubmode;
151   size_t idx = 0;
152   while (idx < count) {
153     wchar_t ch = msg[startpos + idx];
154     switch (submode) {
155       case SubMode::kAlpha:
156         if (IsAlphaUpperOrSpace(ch)) {
157           if (ch == ' ')
158             tmp += 26;
159           else
160             tmp += ch - 65;
161           break;
162         }
163         if (IsAlphaLowerOrSpace(ch)) {
164           submode = SubMode::kLower;
165           tmp += 27;
166           continue;
167         }
168         if (IsMixed(ch)) {
169           submode = SubMode::kMixed;
170           tmp += 28;
171           continue;
172         }
173         if (IsPunctuation(ch)) {
174           tmp += 29;
175           tmp += kPunctuation[ch];
176         }
177         break;
178       case SubMode::kLower:
179         if (IsAlphaLowerOrSpace(ch)) {
180           if (ch == ' ')
181             tmp += 26;
182           else
183             tmp += ch - 97;
184           break;
185         }
186         if (IsAlphaUpperOrSpace(ch)) {
187           tmp += 27;
188           tmp += ch - 65;
189           break;
190         }
191         if (IsMixed(ch)) {
192           submode = SubMode::kMixed;
193           tmp += 28;
194           continue;
195         }
196         if (IsPunctuation(ch)) {
197           tmp += 29;
198           tmp += kPunctuation[ch];
199         }
200         break;
201       case SubMode::kMixed:
202         if (IsMixed(ch)) {
203           tmp += kMixed[ch];
204           break;
205         }
206         if (IsAlphaUpperOrSpace(ch)) {
207           submode = SubMode::kAlpha;
208           tmp += 28;
209           continue;
210         }
211         if (IsAlphaLowerOrSpace(ch)) {
212           submode = SubMode::kLower;
213           tmp += 27;
214           continue;
215         }
216         if (startpos + idx + 1 < count) {
217           wchar_t next = msg[startpos + idx + 1];
218           if (IsPunctuation(next)) {
219             submode = SubMode::kPunctuation;
220             tmp += 25;
221             continue;
222           }
223         }
224         if (IsPunctuation(ch)) {
225           tmp += 29;
226           tmp += kPunctuation[ch];
227         }
228         break;
229       default:
230         if (IsPunctuation(ch)) {
231           tmp += kPunctuation[ch];
232           break;
233         }
234         submode = SubMode::kAlpha;
235         tmp += 29;
236         continue;
237     }
238     ++idx;
239   }
240   wchar_t h = 0;
241   size_t len = tmp.GetLength();
242   for (size_t i = 0; i < len; i++) {
243     bool odd = (i % 2) != 0;
244     if (odd) {
245       h = (h * 30) + tmp[i];
246       *sb += h;
247     } else {
248       h = tmp[i];
249     }
250   }
251   if ((len % 2) != 0)
252     *sb += (h * 30) + 29;
253   return submode;
254 }
255 
EncodeBinary(pdfium::span<const uint8_t> bytes,size_t startpos,size_t count,EncodingMode startmode,WideString * sb)256 void CBC_PDF417HighLevelEncoder::EncodeBinary(pdfium::span<const uint8_t> bytes,
257                                               size_t startpos,
258                                               size_t count,
259                                               EncodingMode startmode,
260                                               WideString* sb) {
261   if (count == 1 && startmode == EncodingMode::kText)
262     *sb += kShiftToByte;
263 
264   size_t idx = startpos;
265   if (count >= 6) {
266     *sb += kLatchToByte;
267     std::array<wchar_t, 5> chars;
268     while ((startpos + count - idx) >= 6) {
269       int64_t t = 0;
270       for (size_t i = 0; i < 6; i++) {
271         t <<= 8;
272         t += bytes[idx + i] & 0xff;
273       }
274       for (size_t i = 0; i < 5; i++) {
275         chars[i] = (t % 900);
276         t /= 900;
277       }
278       for (size_t i = 5; i >= 1; i--)
279         *sb += (chars[i - 1]);
280       idx += 6;
281     }
282   }
283   if (idx < startpos + count)
284     *sb += kLatchToBytePadded;
285   for (size_t i = idx; i < startpos + count; i++) {
286     int32_t ch = bytes[i] & 0xff;
287     *sb += ch;
288   }
289 }
290 
EncodeNumeric(const WideString & msg,size_t startpos,size_t count,WideString * sb)291 void CBC_PDF417HighLevelEncoder::EncodeNumeric(const WideString& msg,
292                                                size_t startpos,
293                                                size_t count,
294                                                WideString* sb) {
295   size_t idx = 0;
296   BigInteger num900 = 900;
297   while (idx < count) {
298     WideString tmp;
299     size_t len = 44 < count - idx ? 44 : count - idx;
300     ByteString part = (L'1' + msg.Substr(startpos + idx, len)).ToUTF8();
301     BigInteger bigint = stringToBigInteger(part.c_str());
302     do {
303       int32_t c = (bigint % num900).toInt();
304       tmp += c;
305       bigint = bigint / num900;
306     } while (!bigint.isZero());
307     for (size_t i = tmp.GetLength(); i >= 1; i--)
308       *sb += tmp[i - 1];
309     idx += len;
310   }
311 }
312 
DetermineConsecutiveDigitCount(WideString msg,size_t startpos)313 size_t CBC_PDF417HighLevelEncoder::DetermineConsecutiveDigitCount(
314     WideString msg,
315     size_t startpos) {
316   size_t count = 0;
317   size_t len = msg.GetLength();
318   size_t idx = startpos;
319   if (idx < len) {
320     wchar_t ch = msg[idx];
321     while (FXSYS_IsDecimalDigit(ch) && idx < len) {
322       count++;
323       idx++;
324       if (idx < len)
325         ch = msg[idx];
326     }
327   }
328   return count;
329 }
330 
DetermineConsecutiveTextCount(WideString msg,size_t startpos)331 size_t CBC_PDF417HighLevelEncoder::DetermineConsecutiveTextCount(
332     WideString msg,
333     size_t startpos) {
334   size_t len = msg.GetLength();
335   size_t idx = startpos;
336   while (idx < len) {
337     wchar_t ch = msg[idx];
338     size_t numericCount = 0;
339     while (numericCount < 13 && FXSYS_IsDecimalDigit(ch) && idx < len) {
340       numericCount++;
341       idx++;
342       if (idx < len)
343         ch = msg[idx];
344     }
345     if (numericCount >= 13)
346       return idx - startpos - numericCount;
347     if (numericCount > 0)
348       continue;
349     ch = msg[idx];
350     if (!IsText(ch))
351       break;
352     idx++;
353   }
354   return idx - startpos;
355 }
356 
357 std::optional<size_t>
DetermineConsecutiveBinaryCount(WideString msg,pdfium::span<const uint8_t> bytes,size_t startpos)358 CBC_PDF417HighLevelEncoder::DetermineConsecutiveBinaryCount(
359     WideString msg,
360     pdfium::span<const uint8_t> bytes,
361     size_t startpos) {
362   size_t len = msg.GetLength();
363   size_t idx = startpos;
364   while (idx < len) {
365     wchar_t ch = msg[idx];
366     size_t numericCount = 0;
367     while (numericCount < 13 && FXSYS_IsDecimalDigit(ch)) {
368       numericCount++;
369       size_t i = idx + numericCount;
370       if (i >= len)
371         break;
372       ch = msg[i];
373     }
374     if (numericCount >= 13)
375       return idx - startpos;
376 
377     size_t textCount = 0;
378     while (textCount < 5 && IsText(ch)) {
379       textCount++;
380       size_t i = idx + textCount;
381       if (i >= len)
382         break;
383       ch = msg[i];
384     }
385     if (textCount >= 5)
386       return idx - startpos;
387     ch = msg[idx];
388     if (bytes[idx] == 63 && ch != '?')
389       return std::nullopt;
390     idx++;
391   }
392   return idx - startpos;
393 }
394