• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8  * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include "fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
24 
25 #include "core/fxcrt/fx_extension.h"
26 #include "core/fxcrt/fx_string.h"
27 #include "third_party/bigint/BigIntegerLibrary.hh"
28 
29 namespace {
30 
31 constexpr int16_t kLatchToText = 900;
32 constexpr int16_t kLatchToBytePadded = 901;
33 constexpr int16_t kLatchToNumeric = 902;
34 constexpr int16_t kShiftToByte = 913;
35 constexpr int16_t kLatchToByte = 924;
36 
37 constexpr int8_t kMixed[128] = {
38     -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, -1, -1, -1, 11, -1, -1, -1, -1, -1,
39     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, 15, 18, 21,
40     10, -1, -1, -1, 22, 20, 13, 16, 17, 19, 0,  1,  2,  3,  4,  5,  6,  7,  8,
41     9,  14, -1, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
42     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 24,
43     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
44     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
45 
46 constexpr int8_t kPunctuation[128] = {
47     -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 15, -1, -1, 11, -1, -1, -1, -1, -1,
48     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 20, -1, 18, -1,
49     -1, 28, 23, 24, 22, -1, 13, 16, 17, 19, -1, -1, -1, -1, -1, -1, -1, -1, -1,
50     -1, 14, 0,  1,  -1, 2,  25, 3,  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
51     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4,  5,  6,  -1,
52     7,  8,  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
53     -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 21, 27, 9,  -1};
54 
IsAlphaUpperOrSpace(wchar_t ch)55 bool IsAlphaUpperOrSpace(wchar_t ch) {
56   return ch == ' ' || FXSYS_IsUpperASCII(ch);
57 }
58 
IsAlphaLowerOrSpace(wchar_t ch)59 bool IsAlphaLowerOrSpace(wchar_t ch) {
60   return ch == ' ' || FXSYS_IsLowerASCII(ch);
61 }
62 
IsMixed(wchar_t ch)63 bool IsMixed(wchar_t ch) {
64   // Bounds check avoiding sign mismatch error given questionable signedness.
65   return !((ch & ~0x7F) || kMixed[ch] == -1);
66 }
67 
IsPunctuation(wchar_t ch)68 bool IsPunctuation(wchar_t ch) {
69   // Bounds check avoiding sign mismatch error given questionable signedness.
70   return !((ch & ~0x7F) || kPunctuation[ch] == -1);
71 }
72 
IsText(wchar_t ch)73 bool IsText(wchar_t ch) {
74   return (ch >= 32 && ch <= 126) || ch == '\t' || ch == '\n' || ch == '\r';
75 }
76 
77 }  // namespace
78 
79 // static
EncodeHighLevel(WideStringView msg)80 absl::optional<WideString> CBC_PDF417HighLevelEncoder::EncodeHighLevel(
81     WideStringView msg) {
82   const ByteString bytes = FX_UTF8Encode(msg);
83   size_t len = bytes.GetLength();
84   WideString result;
85   result.Reserve(len);
86   for (size_t i = 0; i < len; i++) {
87     wchar_t ch = bytes[i] & 0xff;
88     if (ch == '?' && bytes[i] != '?')
89       return absl::nullopt;
90 
91     result += ch;
92   }
93   len = result.GetLength();
94   WideString sb;
95   sb.Reserve(len);
96   size_t p = 0;
97   SubMode textSubMode = SubMode::kAlpha;
98   EncodingMode encodingMode = EncodingMode::kUnknown;
99   while (p < len) {
100     size_t n = DetermineConsecutiveDigitCount(result, p);
101     if (n >= 13) {
102       sb += kLatchToNumeric;
103       encodingMode = EncodingMode::kNumeric;
104       textSubMode = SubMode::kAlpha;
105       EncodeNumeric(result, p, n, &sb);
106       p += n;
107     } else {
108       size_t t = DetermineConsecutiveTextCount(result, p);
109       if (t >= 5 || n == len) {
110         if (encodingMode != EncodingMode::kText) {
111           sb += kLatchToText;
112           encodingMode = EncodingMode::kText;
113           textSubMode = SubMode::kAlpha;
114         }
115         textSubMode = EncodeText(result, p, t, textSubMode, &sb);
116         p += t;
117       } else {
118         absl::optional<size_t> b =
119             DetermineConsecutiveBinaryCount(result, bytes.raw_span(), p);
120         if (!b.has_value())
121           return absl::nullopt;
122 
123         size_t b_value = b.value();
124         if (b_value == 0)
125           b_value = 1;
126         if (b_value == 1 && encodingMode == EncodingMode::kText) {
127           EncodeBinary(bytes.raw_span(), p, 1, EncodingMode::kText, &sb);
128         } else {
129           EncodeBinary(bytes.raw_span(), p, b_value, encodingMode, &sb);
130           encodingMode = EncodingMode::kByte;
131           textSubMode = SubMode::kAlpha;
132         }
133         p += b_value;
134       }
135     }
136   }
137   return sb;
138 }
139 
EncodeText(const WideString & msg,size_t startpos,size_t count,SubMode initialSubmode,WideString * sb)140 CBC_PDF417HighLevelEncoder::SubMode CBC_PDF417HighLevelEncoder::EncodeText(
141     const WideString& msg,
142     size_t startpos,
143     size_t count,
144     SubMode initialSubmode,
145     WideString* sb) {
146   WideString tmp;
147   tmp.Reserve(count);
148   SubMode submode = initialSubmode;
149   size_t idx = 0;
150   while (idx < count) {
151     wchar_t ch = msg[startpos + idx];
152     switch (submode) {
153       case SubMode::kAlpha:
154         if (IsAlphaUpperOrSpace(ch)) {
155           if (ch == ' ')
156             tmp += 26;
157           else
158             tmp += ch - 65;
159           break;
160         }
161         if (IsAlphaLowerOrSpace(ch)) {
162           submode = SubMode::kLower;
163           tmp += 27;
164           continue;
165         }
166         if (IsMixed(ch)) {
167           submode = SubMode::kMixed;
168           tmp += 28;
169           continue;
170         }
171         if (IsPunctuation(ch)) {
172           tmp += 29;
173           tmp += kPunctuation[ch];
174         }
175         break;
176       case SubMode::kLower:
177         if (IsAlphaLowerOrSpace(ch)) {
178           if (ch == ' ')
179             tmp += 26;
180           else
181             tmp += ch - 97;
182           break;
183         }
184         if (IsAlphaUpperOrSpace(ch)) {
185           tmp += 27;
186           tmp += ch - 65;
187           break;
188         }
189         if (IsMixed(ch)) {
190           submode = SubMode::kMixed;
191           tmp += 28;
192           continue;
193         }
194         if (IsPunctuation(ch)) {
195           tmp += 29;
196           tmp += kPunctuation[ch];
197         }
198         break;
199       case SubMode::kMixed:
200         if (IsMixed(ch)) {
201           tmp += kMixed[ch];
202           break;
203         }
204         if (IsAlphaUpperOrSpace(ch)) {
205           submode = SubMode::kAlpha;
206           tmp += 28;
207           continue;
208         }
209         if (IsAlphaLowerOrSpace(ch)) {
210           submode = SubMode::kLower;
211           tmp += 27;
212           continue;
213         }
214         if (startpos + idx + 1 < count) {
215           wchar_t next = msg[startpos + idx + 1];
216           if (IsPunctuation(next)) {
217             submode = SubMode::kPunctuation;
218             tmp += 25;
219             continue;
220           }
221         }
222         if (IsPunctuation(ch)) {
223           tmp += 29;
224           tmp += kPunctuation[ch];
225         }
226         break;
227       default:
228         if (IsPunctuation(ch)) {
229           tmp += kPunctuation[ch];
230           break;
231         }
232         submode = SubMode::kAlpha;
233         tmp += 29;
234         continue;
235     }
236     ++idx;
237   }
238   wchar_t h = 0;
239   size_t len = tmp.GetLength();
240   for (size_t i = 0; i < len; i++) {
241     bool odd = (i % 2) != 0;
242     if (odd) {
243       h = (h * 30) + tmp[i];
244       *sb += h;
245     } else {
246       h = tmp[i];
247     }
248   }
249   if ((len % 2) != 0)
250     *sb += (h * 30) + 29;
251   return submode;
252 }
253 
EncodeBinary(pdfium::span<const uint8_t> bytes,size_t startpos,size_t count,EncodingMode startmode,WideString * sb)254 void CBC_PDF417HighLevelEncoder::EncodeBinary(pdfium::span<const uint8_t> bytes,
255                                               size_t startpos,
256                                               size_t count,
257                                               EncodingMode startmode,
258                                               WideString* sb) {
259   if (count == 1 && startmode == EncodingMode::kText)
260     *sb += kShiftToByte;
261 
262   size_t idx = startpos;
263   if (count >= 6) {
264     *sb += kLatchToByte;
265     wchar_t chars[5];
266     while ((startpos + count - idx) >= 6) {
267       int64_t t = 0;
268       for (size_t i = 0; i < 6; i++) {
269         t <<= 8;
270         t += bytes[idx + i] & 0xff;
271       }
272       for (size_t i = 0; i < 5; i++) {
273         chars[i] = (t % 900);
274         t /= 900;
275       }
276       for (size_t i = 5; i >= 1; i--)
277         *sb += (chars[i - 1]);
278       idx += 6;
279     }
280   }
281   if (idx < startpos + count)
282     *sb += kLatchToBytePadded;
283   for (size_t i = idx; i < startpos + count; i++) {
284     int32_t ch = bytes[i] & 0xff;
285     *sb += ch;
286   }
287 }
288 
EncodeNumeric(const WideString & msg,size_t startpos,size_t count,WideString * sb)289 void CBC_PDF417HighLevelEncoder::EncodeNumeric(const WideString& msg,
290                                                size_t startpos,
291                                                size_t count,
292                                                WideString* sb) {
293   size_t idx = 0;
294   BigInteger num900 = 900;
295   while (idx < count) {
296     WideString tmp;
297     size_t len = 44 < count - idx ? 44 : count - idx;
298     ByteString part = (L'1' + msg.Substr(startpos + idx, len)).ToUTF8();
299     BigInteger bigint = stringToBigInteger(part.c_str());
300     do {
301       int32_t c = (bigint % num900).toInt();
302       tmp += c;
303       bigint = bigint / num900;
304     } while (!bigint.isZero());
305     for (size_t i = tmp.GetLength(); i >= 1; i--)
306       *sb += tmp[i - 1];
307     idx += len;
308   }
309 }
310 
DetermineConsecutiveDigitCount(WideString msg,size_t startpos)311 size_t CBC_PDF417HighLevelEncoder::DetermineConsecutiveDigitCount(
312     WideString msg,
313     size_t startpos) {
314   size_t count = 0;
315   size_t len = msg.GetLength();
316   size_t idx = startpos;
317   if (idx < len) {
318     wchar_t ch = msg[idx];
319     while (FXSYS_IsDecimalDigit(ch) && idx < len) {
320       count++;
321       idx++;
322       if (idx < len)
323         ch = msg[idx];
324     }
325   }
326   return count;
327 }
328 
DetermineConsecutiveTextCount(WideString msg,size_t startpos)329 size_t CBC_PDF417HighLevelEncoder::DetermineConsecutiveTextCount(
330     WideString msg,
331     size_t startpos) {
332   size_t len = msg.GetLength();
333   size_t idx = startpos;
334   while (idx < len) {
335     wchar_t ch = msg[idx];
336     size_t numericCount = 0;
337     while (numericCount < 13 && FXSYS_IsDecimalDigit(ch) && idx < len) {
338       numericCount++;
339       idx++;
340       if (idx < len)
341         ch = msg[idx];
342     }
343     if (numericCount >= 13)
344       return idx - startpos - numericCount;
345     if (numericCount > 0)
346       continue;
347     ch = msg[idx];
348     if (!IsText(ch))
349       break;
350     idx++;
351   }
352   return idx - startpos;
353 }
354 
355 absl::optional<size_t>
DetermineConsecutiveBinaryCount(WideString msg,pdfium::span<const uint8_t> bytes,size_t startpos)356 CBC_PDF417HighLevelEncoder::DetermineConsecutiveBinaryCount(
357     WideString msg,
358     pdfium::span<const uint8_t> bytes,
359     size_t startpos) {
360   size_t len = msg.GetLength();
361   size_t idx = startpos;
362   while (idx < len) {
363     wchar_t ch = msg[idx];
364     size_t numericCount = 0;
365     while (numericCount < 13 && FXSYS_IsDecimalDigit(ch)) {
366       numericCount++;
367       size_t i = idx + numericCount;
368       if (i >= len)
369         break;
370       ch = msg[i];
371     }
372     if (numericCount >= 13)
373       return idx - startpos;
374 
375     size_t textCount = 0;
376     while (textCount < 5 && IsText(ch)) {
377       textCount++;
378       size_t i = idx + textCount;
379       if (i >= len)
380         break;
381       ch = msg[i];
382     }
383     if (textCount >= 5)
384       return idx - startpos;
385     ch = msg[idx];
386     if (bytes[idx] == 63 && ch != '?')
387       return absl::nullopt;
388     idx++;
389   }
390   return idx - startpos;
391 }
392