• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8  * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include "fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
24 
25 #include "core/fxcrt/fx_extension.h"
26 #include "third_party/bigint/BigIntegerLibrary.hh"
27 
28 namespace {
29 
30 constexpr int16_t kLatchToText = 900;
31 constexpr int16_t kLatchToBytePadded = 901;
32 constexpr int16_t kLatchToNumeric = 902;
33 constexpr int16_t kShiftToByte = 913;
34 constexpr int16_t kLatchToByte = 924;
35 
36 constexpr int8_t kMixed[128] = {
37     -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, -1, -1, -1, 11, -1, -1, -1, -1, -1,
38     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, 15, 18, 21,
39     10, -1, -1, -1, 22, 20, 13, 16, 17, 19, 0,  1,  2,  3,  4,  5,  6,  7,  8,
40     9,  14, -1, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 24,
42     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
43     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
44 
45 constexpr int8_t kPunctuation[128] = {
46     -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 15, -1, -1, 11, -1, -1, -1, -1, -1,
47     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 20, -1, 18, -1,
48     -1, 28, 23, 24, 22, -1, 13, 16, 17, 19, -1, -1, -1, -1, -1, -1, -1, -1, -1,
49     -1, 14, 0,  1,  -1, 2,  25, 3,  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
50     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4,  5,  6,  -1,
51     7,  8,  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
52     -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 21, 27, 9,  -1};
53 
IsAlphaUpperOrSpace(wchar_t ch)54 bool IsAlphaUpperOrSpace(wchar_t ch) {
55   return ch == ' ' || (ch >= 'A' && ch <= 'Z');
56 }
57 
IsAlphaLowerOrSpace(wchar_t ch)58 bool IsAlphaLowerOrSpace(wchar_t ch) {
59   return ch == ' ' || (ch >= 'a' && ch <= 'z');
60 }
61 
IsMixed(wchar_t ch)62 bool IsMixed(wchar_t ch) {
63   // Bounds check avoiding sign mismatch error given questionable signedness.
64   return !((ch & ~0x7F) || kMixed[ch] == -1);
65 }
66 
IsPunctuation(wchar_t ch)67 bool IsPunctuation(wchar_t ch) {
68   // Bounds check avoiding sign mismatch error given questionable signedness.
69   return !((ch & ~0x7F) || kPunctuation[ch] == -1);
70 }
71 
IsText(wchar_t ch)72 bool IsText(wchar_t ch) {
73   return (ch >= 32 && ch <= 126) || ch == '\t' || ch == '\n' || ch == '\r';
74 }
75 
76 }  // namespace
77 
78 // static
EncodeHighLevel(WideStringView msg)79 Optional<WideString> CBC_PDF417HighLevelEncoder::EncodeHighLevel(
80     WideStringView msg) {
81   ByteString bytes = FX_UTF8Encode(msg);
82   size_t len = bytes.GetLength();
83   WideString result;
84   result.Reserve(len);
85   for (size_t i = 0; i < len; i++) {
86     wchar_t ch = bytes[i] & 0xff;
87     if (ch == '?' && bytes[i] != '?')
88       return {};
89 
90     result += ch;
91   }
92   std::vector<uint8_t> byteArr(bytes.begin(), bytes.end());
93   len = result.GetLength();
94   WideString sb;
95   sb.Reserve(len);
96   size_t p = 0;
97   SubMode textSubMode = SubMode::kAlpha;
98   EncodingMode encodingMode = EncodingMode::kUnknown;
99   while (p < len) {
100     size_t n = DetermineConsecutiveDigitCount(result, p);
101     if (n >= 13) {
102       sb += kLatchToNumeric;
103       encodingMode = EncodingMode::kNumeric;
104       textSubMode = SubMode::kAlpha;
105       EncodeNumeric(result, p, n, &sb);
106       p += n;
107     } else {
108       size_t t = DetermineConsecutiveTextCount(result, p);
109       if (t >= 5 || n == len) {
110         if (encodingMode != EncodingMode::kText) {
111           sb += kLatchToText;
112           encodingMode = EncodingMode::kText;
113           textSubMode = SubMode::kAlpha;
114         }
115         textSubMode = EncodeText(result, p, t, textSubMode, &sb);
116         p += t;
117       } else {
118         Optional<size_t> b =
119             DetermineConsecutiveBinaryCount(result, &byteArr, p);
120         if (!b)
121           return {};
122 
123         size_t b_value = b.value();
124         if (b_value == 0)
125           b_value = 1;
126         if (b_value == 1 && encodingMode == EncodingMode::kText) {
127           EncodeBinary(byteArr, p, 1, EncodingMode::kText, &sb);
128         } else {
129           EncodeBinary(byteArr, p, b_value, encodingMode, &sb);
130           encodingMode = EncodingMode::kByte;
131           textSubMode = SubMode::kAlpha;
132         }
133         p += b_value;
134       }
135     }
136   }
137   return sb;
138 }
139 
EncodeText(const WideString & msg,size_t startpos,size_t count,SubMode initialSubmode,WideString * sb)140 CBC_PDF417HighLevelEncoder::SubMode CBC_PDF417HighLevelEncoder::EncodeText(
141     const WideString& msg,
142     size_t startpos,
143     size_t count,
144     SubMode initialSubmode,
145     WideString* sb) {
146   WideString tmp;
147   tmp.Reserve(count);
148   SubMode submode = initialSubmode;
149   size_t idx = 0;
150   while (idx < count) {
151     wchar_t ch = msg[startpos + idx];
152     switch (submode) {
153       case SubMode::kAlpha:
154         if (IsAlphaUpperOrSpace(ch)) {
155           if (ch == ' ')
156             tmp += 26;
157           else
158             tmp += ch - 65;
159           break;
160         }
161         if (IsAlphaLowerOrSpace(ch)) {
162           submode = SubMode::kLower;
163           tmp += 27;
164           continue;
165         }
166         if (IsMixed(ch)) {
167           submode = SubMode::kMixed;
168           tmp += 28;
169           continue;
170         }
171         if (IsPunctuation(ch)) {
172           tmp += 29;
173           tmp += kPunctuation[ch];
174         }
175         break;
176       case SubMode::kLower:
177         if (IsAlphaLowerOrSpace(ch)) {
178           if (ch == ' ')
179             tmp += 26;
180           else
181             tmp += ch - 97;
182           break;
183         }
184         if (IsAlphaUpperOrSpace(ch)) {
185           tmp += 27;
186           tmp += ch - 65;
187           break;
188         }
189         if (IsMixed(ch)) {
190           submode = SubMode::kMixed;
191           tmp += 28;
192           continue;
193         }
194         if (IsPunctuation(ch)) {
195           tmp += 29;
196           tmp += kPunctuation[ch];
197         }
198         break;
199       case SubMode::kMixed:
200         if (IsMixed(ch)) {
201           tmp += kMixed[ch];
202           break;
203         }
204         if (IsAlphaUpperOrSpace(ch)) {
205           submode = SubMode::kAlpha;
206           tmp += 28;
207           continue;
208         }
209         if (IsAlphaLowerOrSpace(ch)) {
210           submode = SubMode::kLower;
211           tmp += 27;
212           continue;
213         }
214         if (startpos + idx + 1 < count) {
215           wchar_t next = msg[startpos + idx + 1];
216           if (IsPunctuation(next)) {
217             submode = SubMode::kPunctuation;
218             tmp += 25;
219             continue;
220           }
221         }
222         if (IsPunctuation(ch)) {
223           tmp += 29;
224           tmp += kPunctuation[ch];
225         }
226         break;
227       default:
228         if (IsPunctuation(ch)) {
229           tmp += kPunctuation[ch];
230           break;
231         }
232         submode = SubMode::kAlpha;
233         tmp += 29;
234         continue;
235     }
236     ++idx;
237   }
238   wchar_t h = 0;
239   size_t len = tmp.GetLength();
240   for (size_t i = 0; i < len; i++) {
241     bool odd = (i % 2) != 0;
242     if (odd) {
243       h = (h * 30) + tmp[i];
244       *sb += h;
245     } else {
246       h = tmp[i];
247     }
248   }
249   if ((len % 2) != 0)
250     *sb += (h * 30) + 29;
251   return submode;
252 }
253 
EncodeBinary(pdfium::span<const uint8_t> bytes,size_t startpos,size_t count,EncodingMode startmode,WideString * sb)254 void CBC_PDF417HighLevelEncoder::EncodeBinary(pdfium::span<const uint8_t> bytes,
255                                               size_t startpos,
256                                               size_t count,
257                                               EncodingMode startmode,
258                                               WideString* sb) {
259   if (count == 1 && startmode == EncodingMode::kText)
260     *sb += kShiftToByte;
261 
262   size_t idx = startpos;
263   if (count >= 6) {
264     *sb += kLatchToByte;
265     wchar_t chars[5];
266     while ((startpos + count - idx) >= 6) {
267       int64_t t = 0;
268       for (size_t i = 0; i < 6; i++) {
269         t <<= 8;
270         t += bytes[idx + i] & 0xff;
271       }
272       for (size_t i = 0; i < 5; i++) {
273         chars[i] = (t % 900);
274         t /= 900;
275       }
276       for (size_t i = 5; i >= 1; i--)
277         *sb += (chars[i - 1]);
278       idx += 6;
279     }
280   }
281   if (idx < startpos + count)
282     *sb += kLatchToBytePadded;
283   for (size_t i = idx; i < startpos + count; i++) {
284     int32_t ch = bytes[i] & 0xff;
285     *sb += ch;
286   }
287 }
288 
EncodeNumeric(const WideString & msg,size_t startpos,size_t count,WideString * sb)289 void CBC_PDF417HighLevelEncoder::EncodeNumeric(const WideString& msg,
290                                                size_t startpos,
291                                                size_t count,
292                                                WideString* sb) {
293   size_t idx = 0;
294   BigInteger num900 = 900;
295   while (idx < count) {
296     WideString tmp;
297     size_t len = 44 < count - idx ? 44 : count - idx;
298     ByteString part = (L'1' + msg.Substr(startpos + idx, len)).ToUTF8();
299     BigInteger bigint = stringToBigInteger(part.c_str());
300     do {
301       int32_t c = (bigint % num900).toInt();
302       tmp += c;
303       bigint = bigint / num900;
304     } while (!bigint.isZero());
305     for (size_t i = tmp.GetLength(); i >= 1; i--)
306       *sb += tmp[i - 1];
307     idx += len;
308   }
309 }
310 
DetermineConsecutiveDigitCount(WideString msg,size_t startpos)311 size_t CBC_PDF417HighLevelEncoder::DetermineConsecutiveDigitCount(
312     WideString msg,
313     size_t startpos) {
314   size_t count = 0;
315   size_t len = msg.GetLength();
316   size_t idx = startpos;
317   if (idx < len) {
318     wchar_t ch = msg[idx];
319     while (FXSYS_IsDecimalDigit(ch) && idx < len) {
320       count++;
321       idx++;
322       if (idx < len)
323         ch = msg[idx];
324     }
325   }
326   return count;
327 }
328 
DetermineConsecutiveTextCount(WideString msg,size_t startpos)329 size_t CBC_PDF417HighLevelEncoder::DetermineConsecutiveTextCount(
330     WideString msg,
331     size_t startpos) {
332   size_t len = msg.GetLength();
333   size_t idx = startpos;
334   while (idx < len) {
335     wchar_t ch = msg[idx];
336     size_t numericCount = 0;
337     while (numericCount < 13 && FXSYS_IsDecimalDigit(ch) && idx < len) {
338       numericCount++;
339       idx++;
340       if (idx < len)
341         ch = msg[idx];
342     }
343     if (numericCount >= 13)
344       return idx - startpos - numericCount;
345     if (numericCount > 0)
346       continue;
347     ch = msg[idx];
348     if (!IsText(ch))
349       break;
350     idx++;
351   }
352   return idx - startpos;
353 }
354 
DetermineConsecutiveBinaryCount(WideString msg,std::vector<uint8_t> * bytes,size_t startpos)355 Optional<size_t> CBC_PDF417HighLevelEncoder::DetermineConsecutiveBinaryCount(
356     WideString msg,
357     std::vector<uint8_t>* bytes,
358     size_t startpos) {
359   size_t len = msg.GetLength();
360   size_t idx = startpos;
361   while (idx < len) {
362     wchar_t ch = msg[idx];
363     size_t numericCount = 0;
364     while (numericCount < 13 && FXSYS_IsDecimalDigit(ch)) {
365       numericCount++;
366       size_t i = idx + numericCount;
367       if (i >= len)
368         break;
369       ch = msg[i];
370     }
371     if (numericCount >= 13)
372       return idx - startpos;
373 
374     size_t textCount = 0;
375     while (textCount < 5 && IsText(ch)) {
376       textCount++;
377       size_t i = idx + textCount;
378       if (i >= len)
379         break;
380       ch = msg[i];
381     }
382     if (textCount >= 5)
383       return idx - startpos;
384     ch = msg[idx];
385     if ((*bytes)[idx] == 63 && ch != '?')
386       return {};
387     idx++;
388   }
389   return idx - startpos;
390 }
391