• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8  * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include "xfa/fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
24 
25 #include "third_party/bigint/BigIntegerLibrary.hh"
26 #include "xfa/fxbarcode/BC_UtilCodingConvert.h"
27 #include "xfa/fxbarcode/pdf417/BC_PDF417Compaction.h"
28 #include "xfa/fxbarcode/utils.h"
29 
30 #define SUBMODE_ALPHA 0
31 #define SUBMODE_LOWER 1
32 #define SUBMODE_MIXED 2
33 
34 int32_t CBC_PDF417HighLevelEncoder::TEXT_COMPACTION = 0;
35 int32_t CBC_PDF417HighLevelEncoder::BYTE_COMPACTION = 1;
36 int32_t CBC_PDF417HighLevelEncoder::NUMERIC_COMPACTION = 2;
37 int32_t CBC_PDF417HighLevelEncoder::SUBMODE_PUNCTUATION = 3;
38 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_TEXT = 900;
39 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE_PADDED = 901;
40 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_NUMERIC = 902;
41 int32_t CBC_PDF417HighLevelEncoder::SHIFT_TO_BYTE = 913;
42 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE = 924;
43 uint8_t CBC_PDF417HighLevelEncoder::TEXT_MIXED_RAW[] = {
44     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
45     35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0,  32, 0, 0,  0};
46 uint8_t CBC_PDF417HighLevelEncoder::TEXT_PUNCTUATION_RAW[] = {
47     59, 60, 62, 64, 91, 92, 93,  95, 96, 126, 33, 13,  9,   44, 58,
48     10, 45, 46, 36, 47, 34, 124, 42, 40, 41,  63, 123, 125, 39, 0};
49 int32_t CBC_PDF417HighLevelEncoder::MIXED[128] = {0};
50 int32_t CBC_PDF417HighLevelEncoder::PUNCTUATION[128] = {0};
51 
Initialize()52 void CBC_PDF417HighLevelEncoder::Initialize() {
53   Inverse();
54 }
55 
Finalize()56 void CBC_PDF417HighLevelEncoder::Finalize() {}
57 
encodeHighLevel(CFX_WideString wideMsg,Compaction compaction,int32_t & e)58 CFX_WideString CBC_PDF417HighLevelEncoder::encodeHighLevel(
59     CFX_WideString wideMsg,
60     Compaction compaction,
61     int32_t& e) {
62   CFX_ByteString bytes;
63   CBC_UtilCodingConvert::UnicodeToUTF8(wideMsg, bytes);
64   CFX_WideString msg;
65   int32_t len = bytes.GetLength();
66   for (int32_t i = 0; i < len; i++) {
67     FX_WCHAR ch = (FX_WCHAR)(bytes.GetAt(i) & 0xff);
68     if (ch == '?' && bytes.GetAt(i) != '?') {
69       e = BCExceptionCharactersOutsideISO88591Encoding;
70       return CFX_WideString();
71     }
72     msg += ch;
73   }
74   CFX_ArrayTemplate<uint8_t> byteArr;
75   for (int32_t k = 0; k < bytes.GetLength(); k++) {
76     byteArr.Add(bytes.GetAt(k));
77   }
78   CFX_WideString sb;
79   len = msg.GetLength();
80   int32_t p = 0;
81   int32_t textSubMode = SUBMODE_ALPHA;
82   if (compaction == TEXT) {
83     encodeText(msg, p, len, sb, textSubMode);
84   } else if (compaction == BYTES) {
85     encodeBinary(&byteArr, p, byteArr.GetSize(), BYTE_COMPACTION, sb);
86   } else if (compaction == NUMERIC) {
87     sb += (FX_WCHAR)LATCH_TO_NUMERIC;
88     encodeNumeric(msg, p, len, sb);
89   } else {
90     int32_t encodingMode = LATCH_TO_TEXT;
91     while (p < len) {
92       int32_t n = determineConsecutiveDigitCount(msg, p);
93       if (n >= 13) {
94         sb += (FX_WCHAR)LATCH_TO_NUMERIC;
95         encodingMode = NUMERIC_COMPACTION;
96         textSubMode = SUBMODE_ALPHA;
97         encodeNumeric(msg, p, n, sb);
98         p += n;
99       } else {
100         int32_t t = determineConsecutiveTextCount(msg, p);
101         if (t >= 5 || n == len) {
102           if (encodingMode != TEXT_COMPACTION) {
103             sb += (FX_WCHAR)LATCH_TO_TEXT;
104             encodingMode = TEXT_COMPACTION;
105             textSubMode = SUBMODE_ALPHA;
106           }
107           textSubMode = encodeText(msg, p, t, sb, textSubMode);
108           p += t;
109         } else {
110           int32_t b = determineConsecutiveBinaryCount(msg, &byteArr, p, e);
111           if (e != BCExceptionNO)
112             return L" ";
113           if (b == 0) {
114             b = 1;
115           }
116           if (b == 1 && encodingMode == TEXT_COMPACTION) {
117             encodeBinary(&byteArr, p, 1, TEXT_COMPACTION, sb);
118           } else {
119             encodeBinary(&byteArr, p, b, encodingMode, sb);
120             encodingMode = BYTE_COMPACTION;
121             textSubMode = SUBMODE_ALPHA;
122           }
123           p += b;
124         }
125       }
126     }
127   }
128   return sb;
129 }
130 
Inverse()131 void CBC_PDF417HighLevelEncoder::Inverse() {
132   for (size_t l = 0; l < FX_ArraySize(MIXED); ++l)
133     MIXED[l] = -1;
134 
135   for (uint8_t i = 0; i < FX_ArraySize(TEXT_MIXED_RAW); ++i) {
136     uint8_t b = TEXT_MIXED_RAW[i];
137     if (b != 0)
138       MIXED[b] = i;
139   }
140 
141   for (size_t l = 0; l < FX_ArraySize(PUNCTUATION); ++l)
142     PUNCTUATION[l] = -1;
143 
144   for (uint8_t i = 0; i < FX_ArraySize(TEXT_PUNCTUATION_RAW); ++i) {
145     uint8_t b = TEXT_PUNCTUATION_RAW[i];
146     if (b != 0)
147       PUNCTUATION[b] = i;
148   }
149 }
150 
encodeText(CFX_WideString msg,int32_t startpos,int32_t count,CFX_WideString & sb,int32_t initialSubmode)151 int32_t CBC_PDF417HighLevelEncoder::encodeText(CFX_WideString msg,
152                                                int32_t startpos,
153                                                int32_t count,
154                                                CFX_WideString& sb,
155                                                int32_t initialSubmode) {
156   CFX_WideString tmp;
157   int32_t submode = initialSubmode;
158   int32_t idx = 0;
159   while (true) {
160     FX_WCHAR ch = msg.GetAt(startpos + idx);
161     switch (submode) {
162       case SUBMODE_ALPHA:
163         if (isAlphaUpper(ch)) {
164           if (ch == ' ') {
165             tmp += (FX_WCHAR)26;
166           } else {
167             tmp += (FX_WCHAR)(ch - 65);
168           }
169         } else {
170           if (isAlphaLower(ch)) {
171             submode = SUBMODE_LOWER;
172             tmp += (FX_WCHAR)27;
173             continue;
174           } else if (isMixed(ch)) {
175             submode = SUBMODE_MIXED;
176             tmp += (FX_WCHAR)28;
177             continue;
178           } else {
179             tmp += (FX_WCHAR)29;
180             tmp += PUNCTUATION[ch];
181             break;
182           }
183         }
184         break;
185       case SUBMODE_LOWER:
186         if (isAlphaLower(ch)) {
187           if (ch == ' ') {
188             tmp += (FX_WCHAR)26;
189           } else {
190             tmp += (FX_WCHAR)(ch - 97);
191           }
192         } else {
193           if (isAlphaUpper(ch)) {
194             tmp += (FX_WCHAR)27;
195             tmp += (FX_WCHAR)(ch - 65);
196             break;
197           } else if (isMixed(ch)) {
198             submode = SUBMODE_MIXED;
199             tmp += (FX_WCHAR)28;
200             continue;
201           } else {
202             tmp += (FX_WCHAR)29;
203             tmp += PUNCTUATION[ch];
204             break;
205           }
206         }
207         break;
208       case SUBMODE_MIXED:
209         if (isMixed(ch)) {
210           tmp += MIXED[ch];
211         } else {
212           if (isAlphaUpper(ch)) {
213             submode = SUBMODE_ALPHA;
214             tmp += (FX_WCHAR)28;
215             continue;
216           } else if (isAlphaLower(ch)) {
217             submode = SUBMODE_LOWER;
218             tmp += (FX_WCHAR)27;
219             continue;
220           } else {
221             if (startpos + idx + 1 < count) {
222               FX_WCHAR next = msg.GetAt(startpos + idx + 1);
223               if (isPunctuation(next)) {
224                 submode = SUBMODE_PUNCTUATION;
225                 tmp += (FX_WCHAR)25;
226                 continue;
227               }
228             }
229             tmp += (FX_WCHAR)29;
230             tmp += PUNCTUATION[ch];
231           }
232         }
233         break;
234       default:
235         if (isPunctuation(ch)) {
236           tmp += PUNCTUATION[ch];
237         } else {
238           submode = SUBMODE_ALPHA;
239           tmp += (FX_WCHAR)29;
240           continue;
241         }
242     }
243     idx++;
244     if (idx >= count) {
245       break;
246     }
247   }
248   FX_WCHAR h = 0;
249   int32_t len = tmp.GetLength();
250   for (int32_t i = 0; i < len; i++) {
251     bool odd = (i % 2) != 0;
252     if (odd) {
253       h = (FX_WCHAR)((h * 30) + tmp.GetAt(i));
254       sb += h;
255     } else {
256       h = tmp.GetAt(i);
257     }
258   }
259   if ((len % 2) != 0) {
260     sb += (FX_WCHAR)((h * 30) + 29);
261   }
262   return submode;
263 }
encodeBinary(CFX_ArrayTemplate<uint8_t> * bytes,int32_t startpos,int32_t count,int32_t startmode,CFX_WideString & sb)264 void CBC_PDF417HighLevelEncoder::encodeBinary(CFX_ArrayTemplate<uint8_t>* bytes,
265                                               int32_t startpos,
266                                               int32_t count,
267                                               int32_t startmode,
268                                               CFX_WideString& sb) {
269   if (count == 1 && startmode == TEXT_COMPACTION) {
270     sb += (FX_WCHAR)SHIFT_TO_BYTE;
271   }
272   int32_t idx = startpos;
273   int32_t i = 0;
274   if (count >= 6) {
275     sb += (FX_WCHAR)LATCH_TO_BYTE;
276     FX_WCHAR chars[5];
277     while ((startpos + count - idx) >= 6) {
278       int64_t t = 0;
279       for (i = 0; i < 6; i++) {
280         t <<= 8;
281         t += bytes->GetAt(idx + i) & 0xff;
282       }
283       for (i = 0; i < 5; i++) {
284         chars[i] = (FX_WCHAR)(t % 900);
285         t /= 900;
286       }
287       for (i = 4; i >= 0; i--) {
288         sb += (chars[i]);
289       }
290       idx += 6;
291     }
292   }
293   if (idx < startpos + count) {
294     sb += (FX_WCHAR)LATCH_TO_BYTE_PADDED;
295   }
296   for (i = idx; i < startpos + count; i++) {
297     int32_t ch = bytes->GetAt(i) & 0xff;
298     sb += (FX_WCHAR)ch;
299   }
300 }
encodeNumeric(CFX_WideString msg,int32_t startpos,int32_t count,CFX_WideString & sb)301 void CBC_PDF417HighLevelEncoder::encodeNumeric(CFX_WideString msg,
302                                                int32_t startpos,
303                                                int32_t count,
304                                                CFX_WideString& sb) {
305   int32_t idx = 0;
306   BigInteger num900 = 900;
307   while (idx < count) {
308     CFX_WideString tmp;
309     int32_t len = 44 < count - idx ? 44 : count - idx;
310     CFX_ByteString part =
311         ((FX_WCHAR)'1' + msg.Mid(startpos + idx, len)).UTF8Encode();
312     BigInteger bigint = stringToBigInteger(part.c_str());
313     do {
314       int32_t c = (bigint % num900).toInt();
315       tmp += (FX_WCHAR)(c);
316       bigint = bigint / num900;
317     } while (!bigint.isZero());
318     for (int32_t i = tmp.GetLength() - 1; i >= 0; i--) {
319       sb += tmp.GetAt(i);
320     }
321     idx += len;
322   }
323 }
isDigit(FX_WCHAR ch)324 bool CBC_PDF417HighLevelEncoder::isDigit(FX_WCHAR ch) {
325   return ch >= '0' && ch <= '9';
326 }
isAlphaUpper(FX_WCHAR ch)327 bool CBC_PDF417HighLevelEncoder::isAlphaUpper(FX_WCHAR ch) {
328   return ch == ' ' || (ch >= 'A' && ch <= 'Z');
329 }
isAlphaLower(FX_WCHAR ch)330 bool CBC_PDF417HighLevelEncoder::isAlphaLower(FX_WCHAR ch) {
331   return ch == ' ' || (ch >= 'a' && ch <= 'z');
332 }
isMixed(FX_WCHAR ch)333 bool CBC_PDF417HighLevelEncoder::isMixed(FX_WCHAR ch) {
334   return MIXED[ch] != -1;
335 }
isPunctuation(FX_WCHAR ch)336 bool CBC_PDF417HighLevelEncoder::isPunctuation(FX_WCHAR ch) {
337   return PUNCTUATION[ch] != -1;
338 }
isText(FX_WCHAR ch)339 bool CBC_PDF417HighLevelEncoder::isText(FX_WCHAR ch) {
340   return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
341 }
determineConsecutiveDigitCount(CFX_WideString msg,int32_t startpos)342 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveDigitCount(
343     CFX_WideString msg,
344     int32_t startpos) {
345   int32_t count = 0;
346   int32_t len = msg.GetLength();
347   int32_t idx = startpos;
348   if (idx < len) {
349     FX_WCHAR ch = msg.GetAt(idx);
350     while (isDigit(ch) && idx < len) {
351       count++;
352       idx++;
353       if (idx < len) {
354         ch = msg.GetAt(idx);
355       }
356     }
357   }
358   return count;
359 }
determineConsecutiveTextCount(CFX_WideString msg,int32_t startpos)360 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveTextCount(
361     CFX_WideString msg,
362     int32_t startpos) {
363   int32_t len = msg.GetLength();
364   int32_t idx = startpos;
365   while (idx < len) {
366     FX_WCHAR ch = msg.GetAt(idx);
367     int32_t numericCount = 0;
368     while (numericCount < 13 && isDigit(ch) && idx < len) {
369       numericCount++;
370       idx++;
371       if (idx < len) {
372         ch = msg.GetAt(idx);
373       }
374     }
375     if (numericCount >= 13) {
376       return idx - startpos - numericCount;
377     }
378     if (numericCount > 0) {
379       continue;
380     }
381     ch = msg.GetAt(idx);
382     if (!isText(ch)) {
383       break;
384     }
385     idx++;
386   }
387   return idx - startpos;
388 }
determineConsecutiveBinaryCount(CFX_WideString msg,CFX_ArrayTemplate<uint8_t> * bytes,int32_t startpos,int32_t & e)389 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveBinaryCount(
390     CFX_WideString msg,
391     CFX_ArrayTemplate<uint8_t>* bytes,
392     int32_t startpos,
393     int32_t& e) {
394   int32_t len = msg.GetLength();
395   int32_t idx = startpos;
396   while (idx < len) {
397     FX_WCHAR ch = msg.GetAt(idx);
398     int32_t numericCount = 0;
399     while (numericCount < 13 && isDigit(ch)) {
400       numericCount++;
401       int32_t i = idx + numericCount;
402       if (i >= len) {
403         break;
404       }
405       ch = msg.GetAt(i);
406     }
407     if (numericCount >= 13) {
408       return idx - startpos;
409     }
410     int32_t textCount = 0;
411     while (textCount < 5 && isText(ch)) {
412       textCount++;
413       int32_t i = idx + textCount;
414       if (i >= len) {
415         break;
416       }
417       ch = msg.GetAt(i);
418     }
419     if (textCount >= 5) {
420       return idx - startpos;
421     }
422     ch = msg.GetAt(idx);
423     if (bytes->GetAt(idx) == 63 && ch != '?') {
424       e = BCExceptionNonEncodableCharacterDetected;
425       return -1;
426     }
427     idx++;
428   }
429   return idx - startpos;
430 }
431