• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8  * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include "fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
24 
25 #include "fxbarcode/BC_UtilCodingConvert.h"
26 #include "fxbarcode/pdf417/BC_PDF417Compaction.h"
27 #include "fxbarcode/utils.h"
28 #include "third_party/bigint/BigIntegerLibrary.hh"
29 
30 #define SUBMODE_ALPHA 0
31 #define SUBMODE_LOWER 1
32 #define SUBMODE_MIXED 2
33 
34 int32_t CBC_PDF417HighLevelEncoder::TEXT_COMPACTION = 0;
35 int32_t CBC_PDF417HighLevelEncoder::BYTE_COMPACTION = 1;
36 int32_t CBC_PDF417HighLevelEncoder::NUMERIC_COMPACTION = 2;
37 int32_t CBC_PDF417HighLevelEncoder::SUBMODE_PUNCTUATION = 3;
38 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_TEXT = 900;
39 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE_PADDED = 901;
40 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_NUMERIC = 902;
41 int32_t CBC_PDF417HighLevelEncoder::SHIFT_TO_BYTE = 913;
42 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE = 924;
43 uint8_t CBC_PDF417HighLevelEncoder::TEXT_MIXED_RAW[] = {
44     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
45     35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0,  32, 0, 0,  0};
46 uint8_t CBC_PDF417HighLevelEncoder::TEXT_PUNCTUATION_RAW[] = {
47     59, 60, 62, 64, 91, 92, 93,  95, 96, 126, 33, 13,  9,   44, 58,
48     10, 45, 46, 36, 47, 34, 124, 42, 40, 41,  63, 123, 125, 39, 0};
49 int32_t CBC_PDF417HighLevelEncoder::MIXED[128] = {0};
50 int32_t CBC_PDF417HighLevelEncoder::PUNCTUATION[128] = {0};
51 
Initialize()52 void CBC_PDF417HighLevelEncoder::Initialize() {
53   Inverse();
54 }
55 
Finalize()56 void CBC_PDF417HighLevelEncoder::Finalize() {}
57 
encodeHighLevel(WideString wideMsg,Compaction compaction,int32_t & e)58 WideString CBC_PDF417HighLevelEncoder::encodeHighLevel(WideString wideMsg,
59                                                        Compaction compaction,
60                                                        int32_t& e) {
61   ByteString bytes;
62   CBC_UtilCodingConvert::UnicodeToUTF8(wideMsg, bytes);
63   WideString msg;
64   int32_t len = bytes.GetLength();
65   for (int32_t i = 0; i < len; i++) {
66     wchar_t ch = (wchar_t)(bytes[i] & 0xff);
67     if (ch == '?' && bytes[i] != '?') {
68       e = BCExceptionCharactersOutsideISO88591Encoding;
69       return WideString();
70     }
71     msg += ch;
72   }
73   std::vector<uint8_t> byteArr(bytes.begin(), bytes.end());
74   WideString sb;
75   len = msg.GetLength();
76   int32_t p = 0;
77   int32_t textSubMode = SUBMODE_ALPHA;
78   if (compaction == TEXT) {
79     encodeText(msg, p, len, sb, textSubMode);
80   } else if (compaction == BYTES) {
81     encodeBinary(&byteArr, p, byteArr.size(), BYTE_COMPACTION, sb);
82   } else if (compaction == NUMERIC) {
83     sb += (wchar_t)LATCH_TO_NUMERIC;
84     encodeNumeric(msg, p, len, sb);
85   } else {
86     int32_t encodingMode = LATCH_TO_TEXT;
87     while (p < len) {
88       int32_t n = determineConsecutiveDigitCount(msg, p);
89       if (n >= 13) {
90         sb += (wchar_t)LATCH_TO_NUMERIC;
91         encodingMode = NUMERIC_COMPACTION;
92         textSubMode = SUBMODE_ALPHA;
93         encodeNumeric(msg, p, n, sb);
94         p += n;
95       } else {
96         int32_t t = determineConsecutiveTextCount(msg, p);
97         if (t >= 5 || n == len) {
98           if (encodingMode != TEXT_COMPACTION) {
99             sb += (wchar_t)LATCH_TO_TEXT;
100             encodingMode = TEXT_COMPACTION;
101             textSubMode = SUBMODE_ALPHA;
102           }
103           textSubMode = encodeText(msg, p, t, sb, textSubMode);
104           p += t;
105         } else {
106           int32_t b = determineConsecutiveBinaryCount(msg, &byteArr, p, e);
107           if (e != BCExceptionNO)
108             return L" ";
109           if (b == 0) {
110             b = 1;
111           }
112           if (b == 1 && encodingMode == TEXT_COMPACTION) {
113             encodeBinary(&byteArr, p, 1, TEXT_COMPACTION, sb);
114           } else {
115             encodeBinary(&byteArr, p, b, encodingMode, sb);
116             encodingMode = BYTE_COMPACTION;
117             textSubMode = SUBMODE_ALPHA;
118           }
119           p += b;
120         }
121       }
122     }
123   }
124   return sb;
125 }
126 
Inverse()127 void CBC_PDF417HighLevelEncoder::Inverse() {
128   for (size_t l = 0; l < FX_ArraySize(MIXED); ++l)
129     MIXED[l] = -1;
130 
131   for (uint8_t i = 0; i < FX_ArraySize(TEXT_MIXED_RAW); ++i) {
132     uint8_t b = TEXT_MIXED_RAW[i];
133     if (b != 0)
134       MIXED[b] = i;
135   }
136 
137   for (size_t l = 0; l < FX_ArraySize(PUNCTUATION); ++l)
138     PUNCTUATION[l] = -1;
139 
140   for (uint8_t i = 0; i < FX_ArraySize(TEXT_PUNCTUATION_RAW); ++i) {
141     uint8_t b = TEXT_PUNCTUATION_RAW[i];
142     if (b != 0)
143       PUNCTUATION[b] = i;
144   }
145 }
146 
encodeText(WideString msg,int32_t startpos,int32_t count,WideString & sb,int32_t initialSubmode)147 int32_t CBC_PDF417HighLevelEncoder::encodeText(WideString msg,
148                                                int32_t startpos,
149                                                int32_t count,
150                                                WideString& sb,
151                                                int32_t initialSubmode) {
152   WideString tmp;
153   int32_t submode = initialSubmode;
154   int32_t idx = 0;
155   while (true) {
156     wchar_t ch = msg[startpos + idx];
157     switch (submode) {
158       case SUBMODE_ALPHA:
159         if (isAlphaUpper(ch)) {
160           if (ch == ' ')
161             tmp += (wchar_t)26;
162           else
163             tmp += (wchar_t)(ch - 65);
164           break;
165         }
166         if (isAlphaLower(ch)) {
167           submode = SUBMODE_LOWER;
168           tmp += (wchar_t)27;
169           continue;
170         }
171         if (isMixed(ch)) {
172           submode = SUBMODE_MIXED;
173           tmp += (wchar_t)28;
174           continue;
175         }
176         tmp += (wchar_t)29;
177         tmp += PUNCTUATION[ch];
178         break;
179       case SUBMODE_LOWER:
180         if (isAlphaLower(ch)) {
181           if (ch == ' ') {
182             tmp += (wchar_t)26;
183           } else {
184             tmp += (wchar_t)(ch - 97);
185           }
186           break;
187         }
188         if (isAlphaUpper(ch)) {
189           tmp += (wchar_t)27;
190           tmp += (wchar_t)(ch - 65);
191           break;
192         }
193         if (isMixed(ch)) {
194           submode = SUBMODE_MIXED;
195           tmp += (wchar_t)28;
196           continue;
197         }
198 
199         tmp += (wchar_t)29;
200         tmp += PUNCTUATION[ch];
201         break;
202       case SUBMODE_MIXED:
203         if (isMixed(ch)) {
204           tmp += MIXED[ch];
205           break;
206         }
207         if (isAlphaUpper(ch)) {
208           submode = SUBMODE_ALPHA;
209           tmp += (wchar_t)28;
210           continue;
211         }
212         if (isAlphaLower(ch)) {
213           submode = SUBMODE_LOWER;
214           tmp += (wchar_t)27;
215           continue;
216         }
217         if (startpos + idx + 1 < count) {
218           wchar_t next = msg[startpos + idx + 1];
219           if (isPunctuation(next)) {
220             submode = SUBMODE_PUNCTUATION;
221             tmp += (wchar_t)25;
222             continue;
223           }
224         }
225         tmp += (wchar_t)29;
226         tmp += PUNCTUATION[ch];
227         break;
228       default:
229         if (isPunctuation(ch)) {
230           tmp += PUNCTUATION[ch];
231           break;
232         }
233         submode = SUBMODE_ALPHA;
234         tmp += (wchar_t)29;
235         continue;
236     }
237     idx++;
238     if (idx >= count) {
239       break;
240     }
241   }
242   wchar_t h = 0;
243   int32_t len = tmp.GetLength();
244   for (int32_t i = 0; i < len; i++) {
245     bool odd = (i % 2) != 0;
246     if (odd) {
247       h = (wchar_t)((h * 30) + tmp[i]);
248       sb += h;
249     } else {
250       h = tmp[i];
251     }
252   }
253   if ((len % 2) != 0) {
254     sb += (wchar_t)((h * 30) + 29);
255   }
256   return submode;
257 }
encodeBinary(std::vector<uint8_t> * bytes,int32_t startpos,int32_t count,int32_t startmode,WideString & sb)258 void CBC_PDF417HighLevelEncoder::encodeBinary(std::vector<uint8_t>* bytes,
259                                               int32_t startpos,
260                                               int32_t count,
261                                               int32_t startmode,
262                                               WideString& sb) {
263   if (count == 1 && startmode == TEXT_COMPACTION) {
264     sb += (wchar_t)SHIFT_TO_BYTE;
265   }
266   int32_t idx = startpos;
267   int32_t i = 0;
268   if (count >= 6) {
269     sb += (wchar_t)LATCH_TO_BYTE;
270     wchar_t chars[5];
271     while ((startpos + count - idx) >= 6) {
272       int64_t t = 0;
273       for (i = 0; i < 6; i++) {
274         t <<= 8;
275         t += (*bytes)[idx + i] & 0xff;
276       }
277       for (i = 0; i < 5; i++) {
278         chars[i] = (wchar_t)(t % 900);
279         t /= 900;
280       }
281       for (i = 4; i >= 0; i--) {
282         sb += (chars[i]);
283       }
284       idx += 6;
285     }
286   }
287   if (idx < startpos + count) {
288     sb += (wchar_t)LATCH_TO_BYTE_PADDED;
289   }
290   for (i = idx; i < startpos + count; i++) {
291     int32_t ch = (*bytes)[i] & 0xff;
292     sb += (wchar_t)ch;
293   }
294 }
encodeNumeric(WideString msg,int32_t startpos,int32_t count,WideString & sb)295 void CBC_PDF417HighLevelEncoder::encodeNumeric(WideString msg,
296                                                int32_t startpos,
297                                                int32_t count,
298                                                WideString& sb) {
299   int32_t idx = 0;
300   BigInteger num900 = 900;
301   while (idx < count) {
302     WideString tmp;
303     int32_t len = 44 < count - idx ? 44 : count - idx;
304     ByteString part =
305         ((wchar_t)'1' + msg.Mid(startpos + idx, len)).UTF8Encode();
306     BigInteger bigint = stringToBigInteger(part.c_str());
307     do {
308       int32_t c = (bigint % num900).toInt();
309       tmp += (wchar_t)(c);
310       bigint = bigint / num900;
311     } while (!bigint.isZero());
312     for (int32_t i = tmp.GetLength() - 1; i >= 0; i--) {
313       sb += tmp[i];
314     }
315     idx += len;
316   }
317 }
isDigit(wchar_t ch)318 bool CBC_PDF417HighLevelEncoder::isDigit(wchar_t ch) {
319   return ch >= '0' && ch <= '9';
320 }
isAlphaUpper(wchar_t ch)321 bool CBC_PDF417HighLevelEncoder::isAlphaUpper(wchar_t ch) {
322   return ch == ' ' || (ch >= 'A' && ch <= 'Z');
323 }
isAlphaLower(wchar_t ch)324 bool CBC_PDF417HighLevelEncoder::isAlphaLower(wchar_t ch) {
325   return ch == ' ' || (ch >= 'a' && ch <= 'z');
326 }
isMixed(wchar_t ch)327 bool CBC_PDF417HighLevelEncoder::isMixed(wchar_t ch) {
328   return MIXED[ch] != -1;
329 }
isPunctuation(wchar_t ch)330 bool CBC_PDF417HighLevelEncoder::isPunctuation(wchar_t ch) {
331   return PUNCTUATION[ch] != -1;
332 }
isText(wchar_t ch)333 bool CBC_PDF417HighLevelEncoder::isText(wchar_t ch) {
334   return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
335 }
determineConsecutiveDigitCount(WideString msg,int32_t startpos)336 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveDigitCount(
337     WideString msg,
338     int32_t startpos) {
339   int32_t count = 0;
340   int32_t len = msg.GetLength();
341   int32_t idx = startpos;
342   if (idx < len) {
343     wchar_t ch = msg[idx];
344     while (isDigit(ch) && idx < len) {
345       count++;
346       idx++;
347       if (idx < len) {
348         ch = msg[idx];
349       }
350     }
351   }
352   return count;
353 }
determineConsecutiveTextCount(WideString msg,int32_t startpos)354 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveTextCount(
355     WideString msg,
356     int32_t startpos) {
357   int32_t len = msg.GetLength();
358   int32_t idx = startpos;
359   while (idx < len) {
360     wchar_t ch = msg[idx];
361     int32_t numericCount = 0;
362     while (numericCount < 13 && isDigit(ch) && idx < len) {
363       numericCount++;
364       idx++;
365       if (idx < len) {
366         ch = msg[idx];
367       }
368     }
369     if (numericCount >= 13) {
370       return idx - startpos - numericCount;
371     }
372     if (numericCount > 0) {
373       continue;
374     }
375     ch = msg[idx];
376     if (!isText(ch)) {
377       break;
378     }
379     idx++;
380   }
381   return idx - startpos;
382 }
determineConsecutiveBinaryCount(WideString msg,std::vector<uint8_t> * bytes,int32_t startpos,int32_t & e)383 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveBinaryCount(
384     WideString msg,
385     std::vector<uint8_t>* bytes,
386     int32_t startpos,
387     int32_t& e) {
388   int32_t len = msg.GetLength();
389   int32_t idx = startpos;
390   while (idx < len) {
391     wchar_t ch = msg[idx];
392     int32_t numericCount = 0;
393     while (numericCount < 13 && isDigit(ch)) {
394       numericCount++;
395       int32_t i = idx + numericCount;
396       if (i >= len) {
397         break;
398       }
399       ch = msg[i];
400     }
401     if (numericCount >= 13) {
402       return idx - startpos;
403     }
404     int32_t textCount = 0;
405     while (textCount < 5 && isText(ch)) {
406       textCount++;
407       int32_t i = idx + textCount;
408       if (i >= len) {
409         break;
410       }
411       ch = msg[i];
412     }
413     if (textCount >= 5) {
414       return idx - startpos;
415     }
416     ch = msg[idx];
417     if ((*bytes)[idx] == 63 && ch != '?') {
418       e = BCExceptionNonEncodableCharacterDetected;
419       return -1;
420     }
421     idx++;
422   }
423   return idx - startpos;
424 }
425