1 /* 2 * Copyright (c) 2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 #ifndef HYPHENATE_PATTERN_H 16 #define HYPHENATE_PATTERN_H 17 18 #include <cinttypes> 19 #include <string> 20 #include <vector> 21 22 namespace OHOS::Hyphenate { 23 #define SUCCEED (0) 24 #define FAILED (-1) 25 26 constexpr size_t HYPHEN_DEFAULT_INDENT = 10; 27 constexpr size_t HYPHEN_INDENT_INCREMENT = 2; 28 constexpr size_t HYPHEN_BASE_CODE_SHIFT = 2; 29 constexpr size_t ROOT_INDENT = 12; 30 constexpr size_t LARGE_PATH_SIZE = 8; 31 constexpr size_t BYTES_PRE_WORD = 4; 32 constexpr size_t SHIFT_BITS_14 = 14; 33 constexpr size_t SHIFT_BITS_16 = 16; 34 constexpr size_t SHIFT_BITS_30 = 30; 35 constexpr size_t PADDING_SIZE = 4; 36 constexpr int16_t BREAK_FLAG = '9'; 37 constexpr int16_t NO_BREAK_FLAG = '8'; 38 39 // We make assumption that 14 bytes is enough to represent offset 40 // so we get two first bits in the array for path type 41 // we have two bytes on the offset arrays 42 // for these 43 enum class PathType : uint8_t { 44 PATTERN = 0, 45 LINEAR = 1, 46 PAIRS = 2, 47 DIRECT = 3 48 }; 49 50 std::vector<uint16_t> ConvertToUtf16(const std::string& utf8Str); 51 52 class HyphenProcessor { 53 public: 54 void Proccess(const std::string& filePath, const std::string& outFilePath) const; 55 }; 56 57 class HyphenReader { 58 public: 59 int32_t Read(const char* filePath, const std::vector<uint16_t>& utf16Target) const; 60 }; 61 62 } // namespace OHOS::Hyphenate 63 #endif 64