1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * An implementation of Liang's hyphenation algorithm. 19 */ 20 21 #include <memory> 22 #include <unordered_map> 23 24 #ifndef MINIKIN_HYPHENATOR_H 25 #define MINIKIN_HYPHENATOR_H 26 27 namespace android { 28 29 // hyb file header; implementation details are in the .cpp file 30 struct Header; 31 32 class Hyphenator { 33 public: 34 // Note: this will also require a locale, for proper case folding behavior 35 static Hyphenator* load(const uint16_t* patternData, size_t size); 36 37 // Compute the hyphenation of a word, storing the hyphenation in result vector. Each 38 // entry in the vector is a "hyphen edit" to be applied at the corresponding code unit 39 // offset in the word. Currently 0 means no hyphen and 1 means insert hyphen and break, 40 // but this will be expanded to other edits for nonstandard hyphenation. 41 // Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-phen". 42 void hyphenate(std::vector<uint8_t>* result, const uint16_t* word, size_t len); 43 44 // pattern data is in binary format, as described in doc/hyb_file_format.md. Note: 45 // the caller is responsible for ensuring that the lifetime of the pattern data is 46 // at least as long as the Hyphenator object. 47 48 // Note: nullptr is valid input, in which case the hyphenator only processes soft hyphens 49 static Hyphenator* loadBinary(const uint8_t* patternData); 50 51 private: 52 // apply soft hyphens only, ignoring patterns 53 void hyphenateSoft(uint8_t* result, const uint16_t* word, size_t len); 54 55 // try looking up word in alphabet table, return false if any code units fail to map 56 // Note that this methor writes len+2 entries into alpha_codes (including start and stop) 57 bool alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len); 58 59 // calculate hyphenation from patterns, assuming alphabet lookup has already been done 60 void hyphenateFromCodes(uint8_t* result, const uint16_t* codes, size_t len); 61 62 // TODO: these should become parameters, as they might vary by locale, screen size, and 63 // possibly explicit user control. 64 static const int MIN_PREFIX = 2; 65 static const int MIN_SUFFIX = 3; 66 67 // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is used so 68 // that temporary buffers can be stack-allocated without waste, which is a slightly 69 // different use case. It measures UTF-16 code units. 70 static const size_t MAX_HYPHENATED_SIZE = 64; 71 72 const uint8_t* patternData; 73 74 // accessors for binary data getHeader()75 const Header* getHeader() const { 76 return reinterpret_cast<const Header*>(patternData); 77 } 78 79 }; 80 81 } // namespace android 82 83 #endif // MINIKIN_HYPHENATOR_H 84