• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8  * Copyright 2006-2007 Jeremias Maerki.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include "fxbarcode/datamatrix/BC_HighLevelEncoder.h"
24 
25 #include <limits>
26 #include <memory>
27 #include <vector>
28 
29 #include "fxbarcode/BC_UtilCodingConvert.h"
30 #include "fxbarcode/common/BC_CommonBitMatrix.h"
31 #include "fxbarcode/datamatrix/BC_ASCIIEncoder.h"
32 #include "fxbarcode/datamatrix/BC_Base256Encoder.h"
33 #include "fxbarcode/datamatrix/BC_C40Encoder.h"
34 #include "fxbarcode/datamatrix/BC_EdifactEncoder.h"
35 #include "fxbarcode/datamatrix/BC_Encoder.h"
36 #include "fxbarcode/datamatrix/BC_EncoderContext.h"
37 #include "fxbarcode/datamatrix/BC_SymbolInfo.h"
38 #include "fxbarcode/datamatrix/BC_TextEncoder.h"
39 #include "fxbarcode/datamatrix/BC_X12Encoder.h"
40 #include "fxbarcode/utils.h"
41 #include "third_party/base/ptr_util.h"
42 
43 const wchar_t CBC_HighLevelEncoder::LATCH_TO_C40 = 230;
44 const wchar_t CBC_HighLevelEncoder::LATCH_TO_BASE256 = 231;
45 const wchar_t CBC_HighLevelEncoder::UPPER_SHIFT = 235;
46 const wchar_t CBC_HighLevelEncoder::LATCH_TO_ANSIX12 = 238;
47 const wchar_t CBC_HighLevelEncoder::LATCH_TO_TEXT = 239;
48 const wchar_t CBC_HighLevelEncoder::LATCH_TO_EDIFACT = 240;
49 const wchar_t CBC_HighLevelEncoder::C40_UNLATCH = 254;
50 const wchar_t CBC_HighLevelEncoder::X12_UNLATCH = 254;
51 const wchar_t CBC_HighLevelEncoder::PAD = 129;
52 const wchar_t CBC_HighLevelEncoder::MACRO_05 = 236;
53 const wchar_t CBC_HighLevelEncoder::MACRO_06 = 237;
54 const wchar_t CBC_HighLevelEncoder::MACRO_05_HEADER[] = L"[)>05";
55 const wchar_t CBC_HighLevelEncoder::MACRO_06_HEADER[] = L"[)>06";
56 const wchar_t CBC_HighLevelEncoder::MACRO_TRAILER = 0x0004;
57 
CBC_HighLevelEncoder()58 CBC_HighLevelEncoder::CBC_HighLevelEncoder() {}
~CBC_HighLevelEncoder()59 CBC_HighLevelEncoder::~CBC_HighLevelEncoder() {}
60 
getBytesForMessage(WideString msg)61 std::vector<uint8_t>& CBC_HighLevelEncoder::getBytesForMessage(WideString msg) {
62   ByteString bytestr;
63   CBC_UtilCodingConvert::UnicodeToUTF8(msg, bytestr);
64   m_bytearray.insert(m_bytearray.end(), bytestr.begin(), bytestr.end());
65   return m_bytearray;
66 }
67 
68 // static
encodeHighLevel(WideString msg,WideString ecLevel,bool allowRectangular,int32_t & e)69 WideString CBC_HighLevelEncoder::encodeHighLevel(WideString msg,
70                                                  WideString ecLevel,
71                                                  bool allowRectangular,
72                                                  int32_t& e) {
73   CBC_EncoderContext context(msg, ecLevel, e);
74   if (e != BCExceptionNO)
75     return WideString();
76 
77   context.setAllowRectangular(allowRectangular);
78   if ((msg.Left(6) == MACRO_05_HEADER) && (msg.Last() == MACRO_TRAILER)) {
79     context.writeCodeword(MACRO_05);
80     context.setSkipAtEnd(2);
81     context.m_pos += 6;
82   } else if ((msg.Left(6) == MACRO_06_HEADER) &&
83              (msg.Last() == MACRO_TRAILER)) {
84     context.writeCodeword(MACRO_06);
85     context.setSkipAtEnd(2);
86     context.m_pos += 6;
87   }
88 
89   std::vector<std::unique_ptr<CBC_Encoder>> encoders;
90   encoders.push_back(pdfium::MakeUnique<CBC_ASCIIEncoder>());
91   encoders.push_back(pdfium::MakeUnique<CBC_C40Encoder>());
92   encoders.push_back(pdfium::MakeUnique<CBC_TextEncoder>());
93   encoders.push_back(pdfium::MakeUnique<CBC_X12Encoder>());
94   encoders.push_back(pdfium::MakeUnique<CBC_EdifactEncoder>());
95   encoders.push_back(pdfium::MakeUnique<CBC_Base256Encoder>());
96   int32_t encodingMode = ASCII_ENCODATION;
97   while (context.hasMoreCharacters()) {
98     encoders[encodingMode]->Encode(context, e);
99     if (e != BCExceptionNO)
100       return L"";
101 
102     if (context.m_newEncoding >= 0) {
103       encodingMode = context.m_newEncoding;
104       context.resetEncoderSignal();
105     }
106   }
107   int32_t len = context.m_codewords.GetLength();
108   context.updateSymbolInfo(e);
109   if (e != BCExceptionNO)
110     return L"";
111 
112   int32_t capacity = context.m_symbolInfo->dataCapacity();
113   if (len < capacity) {
114     if (encodingMode != ASCII_ENCODATION &&
115         encodingMode != BASE256_ENCODATION) {
116       context.writeCodeword(0x00fe);
117     }
118   }
119   WideString codewords = context.m_codewords;
120   if (pdfium::base::checked_cast<int32_t>(codewords.GetLength()) < capacity) {
121     codewords += PAD;
122   }
123   while (pdfium::base::checked_cast<int32_t>(codewords.GetLength()) <
124          capacity) {
125     codewords += (randomize253State(
126         PAD, pdfium::base::checked_cast<int32_t>(codewords.GetLength()) + 1));
127   }
128   return codewords;
129 }
lookAheadTest(WideString msg,int32_t startpos,int32_t currentMode)130 int32_t CBC_HighLevelEncoder::lookAheadTest(WideString msg,
131                                             int32_t startpos,
132                                             int32_t currentMode) {
133   if (startpos >= pdfium::base::checked_cast<int32_t>(msg.GetLength())) {
134     return currentMode;
135   }
136   std::vector<float> charCounts;
137   if (currentMode == ASCII_ENCODATION) {
138     charCounts.push_back(0);
139     charCounts.push_back(1);
140     charCounts.push_back(1);
141     charCounts.push_back(1);
142     charCounts.push_back(1);
143     charCounts.push_back(1.25f);
144   } else {
145     charCounts.push_back(1);
146     charCounts.push_back(2);
147     charCounts.push_back(2);
148     charCounts.push_back(2);
149     charCounts.push_back(2);
150     charCounts.push_back(2.25f);
151     charCounts[currentMode] = 0;
152   }
153   int32_t charsProcessed = 0;
154   while (true) {
155     if ((startpos + charsProcessed) ==
156         pdfium::base::checked_cast<int32_t>(msg.GetLength())) {
157       int32_t min = std::numeric_limits<int32_t>::max();
158       std::vector<uint8_t> mins(6);
159       std::vector<int32_t> intCharCounts(6);
160       min = findMinimums(charCounts, intCharCounts, min, mins);
161       int32_t minCount = getMinimumCount(mins);
162       if (intCharCounts[ASCII_ENCODATION] == min) {
163         return ASCII_ENCODATION;
164       }
165       if (minCount == 1 && mins[BASE256_ENCODATION] > 0) {
166         return BASE256_ENCODATION;
167       }
168       if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) {
169         return EDIFACT_ENCODATION;
170       }
171       if (minCount == 1 && mins[TEXT_ENCODATION] > 0) {
172         return TEXT_ENCODATION;
173       }
174       if (minCount == 1 && mins[X12_ENCODATION] > 0) {
175         return X12_ENCODATION;
176       }
177       return C40_ENCODATION;
178     }
179     wchar_t c = msg[startpos + charsProcessed];
180     charsProcessed++;
181     if (isDigit(c)) {
182       charCounts[ASCII_ENCODATION] += 0.5;
183     } else if (isExtendedASCII(c)) {
184       charCounts[ASCII_ENCODATION] = (float)ceil(charCounts[ASCII_ENCODATION]);
185       charCounts[ASCII_ENCODATION] += 2;
186     } else {
187       charCounts[ASCII_ENCODATION] = (float)ceil(charCounts[ASCII_ENCODATION]);
188       charCounts[ASCII_ENCODATION]++;
189     }
190     if (isNativeC40(c)) {
191       charCounts[C40_ENCODATION] += 2.0f / 3.0f;
192     } else if (isExtendedASCII(c)) {
193       charCounts[C40_ENCODATION] += 8.0f / 3.0f;
194     } else {
195       charCounts[C40_ENCODATION] += 4.0f / 3.0f;
196     }
197     if (isNativeText(c)) {
198       charCounts[TEXT_ENCODATION] += 2.0f / 3.0f;
199     } else if (isExtendedASCII(c)) {
200       charCounts[TEXT_ENCODATION] += 8.0f / 3.0f;
201     } else {
202       charCounts[TEXT_ENCODATION] += 4.0f / 3.0f;
203     }
204     if (isNativeX12(c)) {
205       charCounts[X12_ENCODATION] += 2.0f / 3.0f;
206     } else if (isExtendedASCII(c)) {
207       charCounts[X12_ENCODATION] += 13.0f / 3.0f;
208     } else {
209       charCounts[X12_ENCODATION] += 10.0f / 3.0f;
210     }
211     if (isNativeEDIFACT(c)) {
212       charCounts[EDIFACT_ENCODATION] += 3.0f / 4.0f;
213     } else if (isExtendedASCII(c)) {
214       charCounts[EDIFACT_ENCODATION] += 17.0f / 4.0f;
215     } else {
216       charCounts[EDIFACT_ENCODATION] += 13.0f / 4.0f;
217     }
218     charCounts[BASE256_ENCODATION]++;
219     if (charsProcessed >= 4) {
220       std::vector<int32_t> intCharCounts(6);
221       std::vector<uint8_t> mins(6);
222       findMinimums(charCounts, intCharCounts,
223                    std::numeric_limits<int32_t>::max(), mins);
224       int32_t minCount = getMinimumCount(mins);
225       if (intCharCounts[ASCII_ENCODATION] < intCharCounts[BASE256_ENCODATION] &&
226           intCharCounts[ASCII_ENCODATION] < intCharCounts[C40_ENCODATION] &&
227           intCharCounts[ASCII_ENCODATION] < intCharCounts[TEXT_ENCODATION] &&
228           intCharCounts[ASCII_ENCODATION] < intCharCounts[X12_ENCODATION] &&
229           intCharCounts[ASCII_ENCODATION] < intCharCounts[EDIFACT_ENCODATION]) {
230         return ASCII_ENCODATION;
231       }
232       if (intCharCounts[BASE256_ENCODATION] < intCharCounts[ASCII_ENCODATION] ||
233           (mins[C40_ENCODATION] + mins[TEXT_ENCODATION] + mins[X12_ENCODATION] +
234            mins[EDIFACT_ENCODATION]) == 0) {
235         return BASE256_ENCODATION;
236       }
237       if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) {
238         return EDIFACT_ENCODATION;
239       }
240       if (minCount == 1 && mins[TEXT_ENCODATION] > 0) {
241         return TEXT_ENCODATION;
242       }
243       if (minCount == 1 && mins[X12_ENCODATION] > 0) {
244         return X12_ENCODATION;
245       }
246       if (intCharCounts[C40_ENCODATION] + 1 < intCharCounts[ASCII_ENCODATION] &&
247           intCharCounts[C40_ENCODATION] + 1 <
248               intCharCounts[BASE256_ENCODATION] &&
249           intCharCounts[C40_ENCODATION] + 1 <
250               intCharCounts[EDIFACT_ENCODATION] &&
251           intCharCounts[C40_ENCODATION] + 1 < intCharCounts[TEXT_ENCODATION]) {
252         if (intCharCounts[C40_ENCODATION] < intCharCounts[X12_ENCODATION]) {
253           return C40_ENCODATION;
254         }
255         if (intCharCounts[C40_ENCODATION] == intCharCounts[X12_ENCODATION]) {
256           int32_t p = startpos + charsProcessed + 1;
257           int32_t checked_length =
258               pdfium::base::checked_cast<int32_t>(msg.GetLength());
259           while (p < checked_length) {
260             wchar_t tc = msg[p];
261             if (isX12TermSep(tc)) {
262               return X12_ENCODATION;
263             }
264             if (!isNativeX12(tc)) {
265               break;
266             }
267             p++;
268           }
269           return C40_ENCODATION;
270         }
271       }
272     }
273   }
274 }
isDigit(wchar_t ch)275 bool CBC_HighLevelEncoder::isDigit(wchar_t ch) {
276   return ch >= '0' && ch <= '9';
277 }
isExtendedASCII(wchar_t ch)278 bool CBC_HighLevelEncoder::isExtendedASCII(wchar_t ch) {
279   return ch >= 128 && ch <= 255;
280 }
determineConsecutiveDigitCount(WideString msg,int32_t startpos)281 int32_t CBC_HighLevelEncoder::determineConsecutiveDigitCount(WideString msg,
282                                                              int32_t startpos) {
283   int32_t count = 0;
284   int32_t len = msg.GetLength();
285   int32_t idx = startpos;
286   if (idx < len) {
287     wchar_t ch = msg[idx];
288     while (isDigit(ch) && idx < len) {
289       count++;
290       idx++;
291       if (idx < len) {
292         ch = msg[idx];
293       }
294     }
295   }
296   return count;
297 }
298 
randomize253State(wchar_t ch,int32_t codewordPosition)299 wchar_t CBC_HighLevelEncoder::randomize253State(wchar_t ch,
300                                                 int32_t codewordPosition) {
301   int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1;
302   int32_t tempVariable = ch + pseudoRandom;
303   return tempVariable <= 254 ? (wchar_t)tempVariable
304                              : (wchar_t)(tempVariable - 254);
305 }
findMinimums(std::vector<float> & charCounts,std::vector<int32_t> & intCharCounts,int32_t min,std::vector<uint8_t> & mins)306 int32_t CBC_HighLevelEncoder::findMinimums(std::vector<float>& charCounts,
307                                            std::vector<int32_t>& intCharCounts,
308                                            int32_t min,
309                                            std::vector<uint8_t>& mins) {
310   for (size_t l = 0; l < mins.size(); l++)
311     mins[l] = 0;
312 
313   for (size_t i = 0; i < 6; i++) {
314     intCharCounts[i] = static_cast<int32_t>(ceil(charCounts[i]));
315     int32_t current = intCharCounts[i];
316     if (min > current) {
317       min = current;
318       for (size_t j = 0; j < mins.size(); j++)
319         mins[j] = 0;
320     }
321     if (min == current)
322       mins[i]++;
323   }
324   return min;
325 }
getMinimumCount(std::vector<uint8_t> & mins)326 int32_t CBC_HighLevelEncoder::getMinimumCount(std::vector<uint8_t>& mins) {
327   int32_t minCount = 0;
328   for (int32_t i = 0; i < 6; i++) {
329     minCount += mins[i];
330   }
331   return minCount;
332 }
isNativeC40(wchar_t ch)333 bool CBC_HighLevelEncoder::isNativeC40(wchar_t ch) {
334   return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
335 }
isNativeText(wchar_t ch)336 bool CBC_HighLevelEncoder::isNativeText(wchar_t ch) {
337   return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z');
338 }
isNativeX12(wchar_t ch)339 bool CBC_HighLevelEncoder::isNativeX12(wchar_t ch) {
340   return isX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') ||
341          (ch >= 'A' && ch <= 'Z');
342 }
isX12TermSep(wchar_t ch)343 bool CBC_HighLevelEncoder::isX12TermSep(wchar_t ch) {
344   return (ch == '\r') || (ch == '*') || (ch == '>');
345 }
isNativeEDIFACT(wchar_t ch)346 bool CBC_HighLevelEncoder::isNativeEDIFACT(wchar_t ch) {
347   return ch >= ' ' && ch <= '^';
348 }
349