// Copyright 2014 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com // Original code is licensed as follows: /* * Copyright 2006-2007 Jeremias Maerki. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "fxbarcode/datamatrix/BC_HighLevelEncoder.h" #include #include #include #include #include #include "core/fxcrt/fx_extension.h" #include "fxbarcode/common/BC_CommonBitMatrix.h" #include "fxbarcode/datamatrix/BC_ASCIIEncoder.h" #include "fxbarcode/datamatrix/BC_Base256Encoder.h" #include "fxbarcode/datamatrix/BC_C40Encoder.h" #include "fxbarcode/datamatrix/BC_EdifactEncoder.h" #include "fxbarcode/datamatrix/BC_Encoder.h" #include "fxbarcode/datamatrix/BC_EncoderContext.h" #include "fxbarcode/datamatrix/BC_SymbolInfo.h" #include "fxbarcode/datamatrix/BC_TextEncoder.h" #include "fxbarcode/datamatrix/BC_X12Encoder.h" #include "third_party/base/ptr_util.h" namespace { const wchar_t kPad = 129; const wchar_t kMacro05 = 236; const wchar_t kMacro06 = 237; const wchar_t kMacro05Header[] = L"[)>\036" L"05"; const wchar_t kMacro06Header[] = L"[)>\036" L"06"; const wchar_t kMacroTrailer = 0x0004; constexpr size_t kEncoderCount = static_cast(CBC_HighLevelEncoder::Encoding::LAST) + 1; static_assert(kEncoderCount == 6, "Bad encoder count"); wchar_t Randomize253State(wchar_t ch, int32_t codewordPosition) { int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1; int32_t tempVariable = ch + pseudoRandom; return tempVariable <= 254 ? static_cast(tempVariable) : static_cast(tempVariable - 254); } int32_t FindMinimums(const std::array& charCounts, std::array* intCharCounts, std::array* mins) { int32_t min = std::numeric_limits::max(); for (size_t i = 0; i < kEncoderCount; ++i) { int32_t current = static_cast(ceil(charCounts[i])); (*intCharCounts)[i] = current; if (min > current) { min = current; for (auto& m : *mins) m = 0; } if (min == current) (*mins)[i]++; } return min; } int32_t GetMinimumCount(const std::array& mins) { int32_t count = 0; for (const auto& m : mins) count += m; return count; } bool IsNativeC40(wchar_t ch) { return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z'); } bool IsNativeText(wchar_t ch) { return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z'); } bool IsX12TermSep(wchar_t ch) { return (ch == '\r') || (ch == '*') || (ch == '>'); } bool IsNativeX12(wchar_t ch) { return IsX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z'); } bool IsNativeEDIFACT(wchar_t ch) { return ch >= ' ' && ch <= '^'; } size_t EncoderIndex(CBC_HighLevelEncoder::Encoding encoding) { ASSERT(encoding != CBC_HighLevelEncoder::Encoding::UNKNOWN); return static_cast(encoding); } } // namespace // static WideString CBC_HighLevelEncoder::EncodeHighLevel(const WideString& msg) { // Per spec. Alpha numeric input is even shorter. static constexpr size_t kMaxNumericInputLength = 3116; // Exit early if the input is too long. It will fail no matter what. if (msg.GetLength() > kMaxNumericInputLength) return WideString(); CBC_EncoderContext context(msg); if (context.HasCharactersOutsideISO88591Encoding()) return WideString(); if (msg.Back() == kMacroTrailer) { WideString left = msg.First(6); if (left == kMacro05Header) { context.writeCodeword(kMacro05); context.setSkipAtEnd(2); context.m_pos += 6; } else if (left == kMacro06Header) { context.writeCodeword(kMacro06); context.setSkipAtEnd(2); context.m_pos += 6; } } std::vector> encoders; encoders.push_back(pdfium::MakeUnique()); encoders.push_back(pdfium::MakeUnique()); encoders.push_back(pdfium::MakeUnique()); encoders.push_back(pdfium::MakeUnique()); encoders.push_back(pdfium::MakeUnique()); encoders.push_back(pdfium::MakeUnique()); Encoding encodingMode = Encoding::ASCII; while (context.hasMoreCharacters()) { if (!encoders[EncoderIndex(encodingMode)]->Encode(&context)) return WideString(); if (context.m_newEncoding != Encoding::UNKNOWN) { encodingMode = context.m_newEncoding; context.ResetEncoderSignal(); } } size_t len = context.m_codewords.GetLength(); if (!context.UpdateSymbolInfo()) return WideString(); size_t capacity = context.m_symbolInfo->dataCapacity(); if (len < capacity) { if (encodingMode != Encoding::ASCII && encodingMode != Encoding::BASE256) context.writeCodeword(0x00fe); } WideString codewords = context.m_codewords; if (codewords.GetLength() < capacity) codewords += kPad; while (codewords.GetLength() < capacity) codewords += Randomize253State(kPad, codewords.GetLength() + 1); ASSERT(!codewords.IsEmpty()); return codewords; } // static CBC_HighLevelEncoder::Encoding CBC_HighLevelEncoder::LookAheadTest( const WideString& msg, size_t startpos, CBC_HighLevelEncoder::Encoding currentMode) { if (startpos >= msg.GetLength()) return currentMode; std::array charCounts; if (currentMode == Encoding::ASCII) { charCounts = {0, 1, 1, 1, 1, 1.25f}; } else { charCounts = {1, 2, 2, 2, 2, 2.25f}; charCounts[EncoderIndex(currentMode)] = 0; } size_t charsProcessed = 0; while (true) { if ((startpos + charsProcessed) == msg.GetLength()) { std::array intCharCounts; std::array mins; int32_t min = FindMinimums(charCounts, &intCharCounts, &mins); if (intCharCounts[EncoderIndex(Encoding::ASCII)] == min) return Encoding::ASCII; const int32_t minCount = GetMinimumCount(mins); if (minCount == 1) { if (mins[EncoderIndex(Encoding::BASE256)] > 0) return Encoding::BASE256; if (mins[EncoderIndex(Encoding::EDIFACT)] > 0) return Encoding::EDIFACT; if (mins[EncoderIndex(Encoding::TEXT)] > 0) return Encoding::TEXT; if (mins[EncoderIndex(Encoding::X12)] > 0) return Encoding::X12; } return Encoding::C40; } wchar_t c = msg[startpos + charsProcessed]; charsProcessed++; { auto& count = charCounts[EncoderIndex(Encoding::ASCII)]; if (FXSYS_IsDecimalDigit(c)) count += 0.5; else if (IsExtendedASCII(c)) count = ceilf(count) + 2; else count = ceilf(count) + 1; } { auto& count = charCounts[EncoderIndex(Encoding::C40)]; if (IsNativeC40(c)) count += 2.0f / 3.0f; else if (IsExtendedASCII(c)) count += 8.0f / 3.0f; else count += 4.0f / 3.0f; } { auto& count = charCounts[EncoderIndex(Encoding::TEXT)]; if (IsNativeText(c)) count += 2.0f / 3.0f; else if (IsExtendedASCII(c)) count += 8.0f / 3.0f; else count += 4.0f / 3.0f; } { auto& count = charCounts[EncoderIndex(Encoding::X12)]; if (IsNativeX12(c)) count += 2.0f / 3.0f; else if (IsExtendedASCII(c)) count += 13.0f / 3.0f; else count += 10.0f / 3.0f; } { auto& count = charCounts[EncoderIndex(Encoding::EDIFACT)]; if (IsNativeEDIFACT(c)) count += 3.0f / 4.0f; else if (IsExtendedASCII(c)) count += 17.0f / 4.0f; else count += 13.0f / 4.0f; } charCounts[EncoderIndex(Encoding::BASE256)]++; if (charsProcessed < 4) continue; std::array intCharCounts; std::array mins; FindMinimums(charCounts, &intCharCounts, &mins); int32_t minCount = GetMinimumCount(mins); int32_t ascii_count = intCharCounts[EncoderIndex(Encoding::ASCII)]; int32_t c40_count = intCharCounts[EncoderIndex(Encoding::C40)]; int32_t text_count = intCharCounts[EncoderIndex(Encoding::TEXT)]; int32_t x12_count = intCharCounts[EncoderIndex(Encoding::X12)]; int32_t editfact_count = intCharCounts[EncoderIndex(Encoding::EDIFACT)]; int32_t base256_count = intCharCounts[EncoderIndex(Encoding::BASE256)]; int32_t bet_min = std::min({base256_count, editfact_count, text_count}); if (ascii_count < bet_min && ascii_count < c40_count && ascii_count < x12_count) { return Encoding::ASCII; } if (base256_count < ascii_count || (mins[EncoderIndex(Encoding::C40)] + mins[EncoderIndex(Encoding::TEXT)] + mins[EncoderIndex(Encoding::X12)] + mins[EncoderIndex(Encoding::EDIFACT)]) == 0) { return Encoding::BASE256; } if (minCount == 1) { if (mins[EncoderIndex(Encoding::EDIFACT)] > 0) return Encoding::EDIFACT; if (mins[EncoderIndex(Encoding::TEXT)] > 0) return Encoding::TEXT; if (mins[EncoderIndex(Encoding::X12)] > 0) return Encoding::X12; } if (c40_count + 1 < ascii_count && c40_count + 1 < bet_min) { if (c40_count < x12_count) return Encoding::C40; if (c40_count == x12_count) { size_t p = startpos + charsProcessed + 1; while (p < msg.GetLength()) { wchar_t tc = msg[p]; if (IsX12TermSep(tc)) return Encoding::X12; if (!IsNativeX12(tc)) break; p++; } return Encoding::C40; } } } } // static bool CBC_HighLevelEncoder::IsExtendedASCII(wchar_t ch) { return ch >= 128 && ch <= 255; }