1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #ifdef OHOS_SUPPORT
16 #include <algorithm>
17 #ifdef _WIN32
18 #include <cstdlib>
19 #else
20 #include <cstddef>
21 #include <climits>
22 #endif
23 #include <iostream>
24 #include <fstream>
25 #include <unicode/utf.h>
26 #include <unicode/utf8.h>
27 #include <unordered_set>
28
29 #include "include/Hyphenator.h"
30 #include "log.h"
31 #include "trace.h"
32
33 namespace skia {
34 namespace textlayout {
35 std::once_flag Hyphenator::initFlag;
36 const std::map<std::string, std::string> HPB_FILE_NAMES = {
37 {"as", "hyph-as.hpb"}, // Assamese
38 {"be", "hyph-be.hpb"}, // Belarusian
39 {"bg", "hyph-bg.hpb"}, // Bulgarian
40 {"bn", "hyph-bn.hpb"}, // Bengali
41 {"cs", "hyph-cs.hpb"}, // Czech
42 {"cy", "hyph-cy.hpb"}, // Welsh
43 {"da", "hyph-da.hpb"}, // Danish
44 {"de-1996", "hyph-de-1996.hpb"}, // German,1996orthography
45 {"de-1901", "hyph-de-1901.hpb"}, // German,1901orthography
46 {"de-ch-1901", "hyph-de-ch-1901.hpb"}, // SwissGerman,1901orthography
47 {"el-monoton", "hyph-el-monoton.hpb"}, // ModernGreek,monotonic
48 {"el-polyton", "hyph-el-polyton.hpb"}, // ModernGreek,polytonic
49 {"en-latn", "hyph-en-gb.hpb"}, // Latin English
50 {"en-gb", "hyph-en-gb.hpb"}, // British English
51 {"en-us", "hyph-en-us.hpb"}, // American English
52 {"es", "hyph-es.hpb"}, // Spanish
53 {"et", "hyph-et.hpb"}, // Estonian
54 {"fr", "hyph-fr.hpb"}, // French
55 {"ga", "hyph-ga.hpb"}, // Irish
56 {"gl", "hyph-gl.hpb"}, // Galician
57 {"gu", "hyph-gu.hpb"}, // Gujarati
58 {"hi", "hyph-hi.hpb"}, // Hindi
59 {"hr", "hyph-hr.hpb"}, // Croatian
60 {"hu", "hyph-hu.hpb"}, // Hungarian
61 {"hy", "hyph-hy.hpb"}, // Armenian
62 {"id", "hyph-id.hpb"}, // Indonesian
63 {"is", "hyph-is.hpb"}, // Icelandic
64 {"it", "hyph-it.hpb"}, // Italian
65 {"ka", "hyph-ka.hpb"}, // Georgian
66 {"kn", "hyph-kn.hpb"}, // Kannada
67 {"la", "hyph-la.hpb"}, // Latin
68 {"lt", "hyph-lt.hpb"}, // Lithuanian
69 {"lv", "hyph-lv.hpb"}, // Latvian
70 {"mk", "hyph-mk.hpb"}, // Macedonian
71 {"ml", "hyph-ml.hpb"}, // Malayalam
72 {"mn-cyrl", "hyph-mn-cyrl.hpb"}, // Mongolian,Cyrillicscript
73 {"mr", "hyph-mr.hpb"}, // Marathi
74 {"mul-ethi", "hyph-mul-ethi.hpb"}, // Ethiopic
75 {"nl", "hyph-nl.hpb"}, // Dutch
76 {"or", "hyph-or.hpb"}, // Oriya
77 {"pa", "hyph-pa.hpb"}, // Punjabi
78 {"pl", "hyph-pl.hpb"}, // Polish
79 {"pt", "hyph-pt.hpb"}, // Portuguese
80 {"rm", "hyph-rm.hpb"}, // Romansh
81 {"ru", "hyph-ru.hpb"}, // Russian
82 {"sh-cyrl", "hyph-sh-cyrl.hpb"}, // Serbo-Croatian,Cyrillicscript
83 {"sh-latn", "hyph-sh-latn.hpb"}, // Serbo-Croatian,Latinscript
84 {"sk", "hyph-sk.hpb"}, // Slovak
85 {"sl", "hyph-sl.hpb"}, // Slovenian
86 {"sr-cyrl", "hyph-sr-cyrl.hpb"}, // Serbian,Cyrillicscript
87 {"sv", "hyph-sv.hpb"}, // Swedish
88 {"ta", "hyph-ta.hpb"}, // Tamil
89 {"te", "hyph-te.hpb"}, // Telugu
90 {"th", "hyph-th.hpb"}, // Thai
91 {"tk", "hyph-tk.hpb"}, // Turkmen
92 {"tr", "hyph-tr.hpb"}, // Turkish
93 {"uk", "hyph-uk.hpb"}, // Ukrainian
94 {"pinyin", "hyph-zh-latn-pinyin.hpb"}, // Chinese,Pinyin. language code ‘pinyin’ is not right,will be repair later
95 };
96
97 // in hyphenation, when a word ends with below chars, the char(s) is stripped during hyphenation.
98 const std::unordered_set<uint16_t> EXCLUDED_WORD_ENDING_CHARS = {
99 0x21, // !
100 0x22, // "
101 0x23, // #
102 0x24, // $
103 0x25, // %
104 0x26, // &
105 0x27, // '
106 0x28, // (
107 0x29, // )
108 0x2A, // *
109 0x2B, // +
110 0x2C, // ,
111 0x2D, // -
112 0x2e, // .
113 0x2f, // /
114 0x3A, // :
115 0x3b, // ;
116 0x3C, // <
117 0x3D, // =
118 0x3E, // >
119 0x3F // ?
120 };
121
122 struct HyphenTableInfo {
123 const HyphenatorHeader* header{nullptr};
124 const uint32_t* maindict{nullptr};
125 const ArrayOf16bits* mappings{nullptr};
126
initHyphenTableInfoskia::textlayout::HyphenTableInfo127 bool initHyphenTableInfo(const std::vector<uint8_t>& hyphenatorData)
128 {
129 if (hyphenatorData.size() < sizeof(HyphenatorHeader)) {
130 return false;
131 }
132 header = reinterpret_cast<const HyphenatorHeader*>(hyphenatorData.data());
133 // get master table, it always is in direct mode
134 maindict = (uint32_t*)(hyphenatorData.data() + header->toc);
135 mappings = reinterpret_cast<const ArrayOf16bits*>(hyphenatorData.data() + header->mappings);
136 // this is actually beyond the real 32 bit address, but just to have an offset that
137 // is clearly out of bounds without recalculating it again
138 return !(header->minCp == header->maxCp && mappings->count == 0);
139 }
140 };
141
142 struct HyphenSubTable {
143 uint16_t* staticOffset{nullptr};
144 uint32_t nextOffset{0};
145 PathType type{PathType::PATTERN};
146
initHyphenSubTableInfoskia::textlayout::HyphenSubTable147 bool initHyphenSubTableInfo(uint16_t& code, uint16_t& offset, HyphenTableInfo& hyphenInfo)
148 {
149 auto header = hyphenInfo.header;
150 if (offset == header->maxCount(hyphenInfo.mappings)) {
151 code = 0;
152 return false;
153 }
154
155 uint32_t baseOffset = *(hyphenInfo.maindict + offset - 1); // previous entry end
156 uint32_t initialValue = *(hyphenInfo.maindict + offset);
157 this->type = (PathType)(initialValue >> HYPHEN_SHIFT_BITS_30);
158 // direct and pairs need to have offset different from zero
159 if (initialValue == 0 && (type == PathType::DIRECT || type == PathType::PAIRS)) {
160 return false;
161 }
162 // base offset is 16 bit
163 auto address = reinterpret_cast<const uint8_t*>(header);
164 this->staticOffset = (uint16_t*)(address + HYPHEN_BASE_CODE_SHIFT * baseOffset);
165
166 // get a subtable according character
167 // once: read as 32bit, the rest of the access will be 16bit (13bit for offsets)
168 this->nextOffset = (initialValue & 0x3fffffff);
169 return true;
170 }
171 };
172
173 struct HyphenFindBreakParam {
174 const HyphenatorHeader* header{nullptr};
175 HyphenSubTable hyphenSubTable;
176 uint16_t code{0};
177 uint16_t offset{0};
178 };
179
ReadBinaryFile(const std::string & filePath,std::vector<uint8_t> & buffer)180 void ReadBinaryFile(const std::string& filePath, std::vector<uint8_t>& buffer)
181 {
182 char tmpPath[PATH_MAX] = {0};
183 if (filePath.size() > PATH_MAX) {
184 TEXT_LOGE("File name is too long");
185 return;
186 }
187 #ifdef _WIN32
188 auto canonicalFilePath = _fullpath(tmpPath, filePath.c_str(), sizeof(tmpPath));
189 #else
190 auto canonicalFilePath = realpath(filePath.c_str(), tmpPath);
191 #endif
192 if (canonicalFilePath == nullptr) {
193 TEXT_LOGE("Invalid file %{public}s", filePath.c_str());
194 return;
195 }
196 std::ifstream file(canonicalFilePath, std::ifstream::binary);
197 if (!file.is_open()) {
198 TEXT_LOGE("Failed to open %{public}s", filePath.c_str());
199 return;
200 }
201
202 file.seekg(0, std::ios::end);
203 std::streamsize length = file.tellg();
204 file.seekg(0, std::ios::beg);
205
206 buffer.resize(length);
207 if (!file.read(reinterpret_cast<char*>(buffer.data()), length)) {
208 TEXT_LOGE("Failed to read %{public}s", filePath.c_str());
209 }
210 file.close();
211 }
212
getLanguageCode(std::string locale,int hyphenPos)213 std::string getLanguageCode(std::string locale, int hyphenPos)
214 {
215 // to lower case
216 std::transform(locale.begin(), locale.end(), locale.begin(), ::tolower);
217
218 // find '-',substring the locale
219 size_t pos = std::string::npos;
220 int count = 0;
221 for (size_t i = 0; i < locale.size(); ++i) {
222 if (locale[i] == '-') {
223 ++count;
224 if (count == hyphenPos) {
225 pos = i;
226 break;
227 }
228 }
229 }
230
231 if (pos != std::string::npos) {
232 return locale.substr(0, pos);
233 } else {
234 return locale;
235 }
236 }
237
initTrieTree()238 void Hyphenator::initTrieTree()
239 {
240 for (const auto& item : HPB_FILE_NAMES) {
241 fTrieTree.insert(item.first, item.second);
242 }
243 }
244
getHyphenatorData(const std::string & locale)245 const std::vector<uint8_t>& Hyphenator::getHyphenatorData(const std::string& locale)
246 {
247 const std::vector<uint8_t>& firstResult =
248 findHyphenatorData(getLanguageCode(locale, 2)); //num 2:sub string locale to the second '-'
249 if (!firstResult.empty()) {
250 return firstResult;
251 } else {
252 return findHyphenatorData(getLanguageCode(locale, 1));
253 }
254 }
255
findHyphenatorData(const std::string & langCode)256 const std::vector<uint8_t>& Hyphenator::findHyphenatorData(const std::string& langCode)
257 {
258 {
259 std::shared_lock<std::shared_mutex> readLock(mutex_);
260 auto search = fHyphenMap.find(langCode);
261 if (search != fHyphenMap.end()) {
262 return search->second;
263 }
264 }
265
266 return loadPatternFile(langCode);
267 }
268
loadPatternFile(const std::string & langCode)269 const std::vector<uint8_t>& Hyphenator::loadPatternFile(const std::string& langCode)
270 {
271 std::unique_lock<std::shared_mutex> writeLock(mutex_);
272 auto search = fHyphenMap.find(langCode);
273 if (search != fHyphenMap.end()) {
274 return search->second;
275 }
276 std::string hpbFileName = fTrieTree.findPartialMatch(langCode);
277 if (!hpbFileName.empty()) {
278 std::string filename = "/system/usr/ohos_hyphen_data/" + hpbFileName;
279 std::vector<uint8_t> fileBuffer;
280 ReadBinaryFile(filename, fileBuffer);
281 if (!fileBuffer.empty()) {
282 fHyphenMap.emplace(langCode, std::move(fileBuffer));
283 return fHyphenMap[langCode];
284 }
285 }
286 return fEmptyResult;
287 }
288
formatTarget(std::vector<uint16_t> & target)289 void formatTarget(std::vector<uint16_t>& target)
290 {
291 while (EXCLUDED_WORD_ENDING_CHARS.find(target.back()) != EXCLUDED_WORD_ENDING_CHARS.end()) {
292 target.pop_back();
293 if (target.empty()) {
294 // nothing to be hyphenated
295 return;
296 }
297 }
298 target.insert(target.cbegin(), '.');
299 target.push_back('.');
300
301 for (auto& code : target) {
302 HyphenatorHeader::toLower(code);
303 }
304 }
305
processPattern(const Pattern * p,size_t count,uint32_t index,std::vector<uint16_t> & word,std::vector<uint8_t> & res)306 void processPattern(const Pattern* p, size_t count, uint32_t index, std::vector<uint16_t>& word,
307 std::vector<uint8_t>& res)
308 {
309 TEXT_LOGD("Index:%{public}u", index);
310 if (count > 0) {
311 count *= 0x4; // patterns are padded to 4 byte arrays
312 // when we reach pattern node (leaf), we need to increase index by one because of our
313 // own code offset
314 for (size_t currentIndex = 0; index < res.size() && currentIndex < count; index++) {
315 TEXT_LOGD("Pattern info:%{public}zu, %{public}u, 0x%{public}x", count, index, p->patterns[currentIndex]);
316 res[index] = std::max(res[index], (p->patterns[currentIndex]));
317 currentIndex++;
318 }
319 }
320 }
321
processLinear(uint16_t * data,size_t index,HyphenFindBreakParam & param,std::vector<uint16_t> & word,std::vector<uint8_t> & res)322 void processLinear(uint16_t* data, size_t index, HyphenFindBreakParam& param, std::vector<uint16_t>& word,
323 std::vector<uint8_t>& res)
324 {
325 TEXT_LOGD("Index:%{public}zu", index);
326 const ArrayOf16bits* p = reinterpret_cast<const ArrayOf16bits*>(data);
327 uint16_t count = p->count;
328 if (count > index + 1) {
329 // the pattern is longer than the remaining word
330 return;
331 }
332 index--;
333
334 // check the rest of the string
335 for (auto j = 0; j < count; j++) {
336 if (p->codes[j] != word[index]) {
337 return;
338 } else {
339 index--;
340 }
341 }
342 uint32_t offset = 1 + count; // array size, code points, no padding for 16 bit
343 uint16_t pOffset = *(data + offset);
344 offset++; // move forward, after pattern
345 if (pOffset == 0) {
346 return;
347 }
348
349 const Pattern* matchPattern =
350 reinterpret_cast<const Pattern*>(reinterpret_cast<const uint8_t*>(param.header) + (pOffset & 0xfff));
351 index++; // matching peeks ahead
352 processPattern(matchPattern, (pOffset >> 0xc), index, word, res);
353 if (*(data + offset) != 0) { // peek if there is more to come
354 return processLinear(data + offset, index, param, word, res);
355 }
356 }
357
processDirect(uint16_t * data,HyphenFindBreakParam & param,uint32_t & nextOffset,PathType & type)358 bool processDirect(uint16_t* data, HyphenFindBreakParam& param, uint32_t& nextOffset, PathType& type)
359 {
360 TEXT_LOGD("");
361 param.offset = param.header->codeOffset(param.code);
362 if (param.header->minCp != param.header->maxCp && param.offset > param.header->maxCp) {
363 return false;
364 }
365 uint16_t nextValue = *(data + nextOffset + param.offset);
366 nextOffset = nextValue & 0x3fff; // Use mask 0x3fff to extract the lower 14 bits of nextValue
367 type = (PathType)(nextValue >> HYPHEN_SHIFT_BITS_14);
368 return true;
369 }
370
processPairs(const ArrayOf16bits * data,HyphenFindBreakParam & param,uint16_t code,uint32_t & nextOffset,PathType & type)371 bool processPairs(const ArrayOf16bits* data, HyphenFindBreakParam& param, uint16_t code, uint32_t& nextOffset,
372 PathType& type)
373 {
374 TEXT_LOGD("Code:0x%{public}x", code);
375 uint16_t count = data->count;
376 bool match = false;
377 for (size_t j = 0; j < count; j += HYPHEN_BASE_CODE_SHIFT) {
378 if (data->codes[j] == code) {
379 nextOffset = data->codes[j + 1] & 0x3fff;
380 type = (PathType)(data->codes[j + 1] >> HYPHEN_SHIFT_BITS_14);
381 match = true;
382 break;
383 } else if (data->codes[j] > code) {
384 break;
385 }
386 }
387 return match;
388 }
389
findBreakByType(HyphenFindBreakParam & param,const size_t & targetIndex,std::vector<uint16_t> & target,std::vector<uint8_t> & result)390 void findBreakByType(HyphenFindBreakParam& param, const size_t& targetIndex, std::vector<uint16_t>& target,
391 std::vector<uint8_t>& result)
392 {
393 TEXT_LOGD("TopLevel:%{public}zu", targetIndex);
394 auto [staticOffset, nextOffset, type] = param.hyphenSubTable;
395 uint32_t index = 0; // used in inner loop to traverse path further (backwards)
396 while (true) {
397 TEXT_LOGD("Loop:%{public}zu %{public}u", targetIndex, index);
398 // there is always at 16bit of pattern address before next node data
399 uint16_t pOffset = *(staticOffset + nextOffset);
400 // from binary version 2 onwards, we have common nodes with 16bit offset (not bound to code points)
401 if (type == PathType::PATTERN && (param.header->version >> 0x18) > 1) {
402 pOffset =
403 *(reinterpret_cast<const uint16_t*>(param.header) + nextOffset + (param.header->version & 0xffff));
404 }
405 nextOffset++;
406 if (pOffset > 0) {
407 // if we have reached pattern, apply it to result
408 uint16_t count = (pOffset >> 0xc);
409 pOffset = 0xfff & pOffset;
410 auto p = reinterpret_cast<const Pattern*>(reinterpret_cast<const uint8_t*>(param.header) + pOffset);
411 processPattern(p, count, targetIndex - index, target, result);
412 }
413 if (type == PathType::PATTERN) {
414 // just break the loop
415 break;
416 } else if (type == PathType::DIRECT) {
417 if (index == targetIndex) {
418 break;
419 }
420 index++; // resolve new code point (on the left)
421 param.code = target[targetIndex - index];
422 if (!processDirect(staticOffset, param, nextOffset, type)) {
423 break;
424 }
425 } else if (type == PathType::LINEAR) {
426 processLinear((staticOffset + nextOffset), targetIndex - index, param, target, result);
427 // when a linear element has been processed, we always break and move to next top level index
428 break;
429 } else {
430 if (index == targetIndex) {
431 break;
432 }
433 index++;
434 auto p = reinterpret_cast<const ArrayOf16bits*>(staticOffset + nextOffset);
435 if (!processPairs(p, param, target[targetIndex - index], nextOffset, type)) {
436 break;
437 }
438 }
439 }
440 }
441
findBreaks(const std::vector<uint8_t> & hyphenatorData,std::vector<uint16_t> & target,std::vector<uint8_t> & result)442 void findBreaks(const std::vector<uint8_t>& hyphenatorData, std::vector<uint16_t>& target, std::vector<uint8_t>& result)
443 {
444 HyphenTableInfo hyphenInfo;
445 if (!hyphenInfo.initHyphenTableInfo(hyphenatorData)) {
446 return;
447 }
448
449 if (target.size() > 0) {
450 for (size_t i = target.size() - 1; i >= 1; --i) {
451 HyphenSubTable hyphenSubTable;
452 auto header = hyphenInfo.header;
453 auto code = target[i];
454 auto offset = header->codeOffset(code, hyphenInfo.mappings);
455 if (!hyphenSubTable.initHyphenSubTableInfo(code, offset, hyphenInfo)) {
456 continue;
457 }
458 HyphenFindBreakParam param{header, hyphenSubTable, code, offset};
459 findBreakByType(param, i, target, result);
460 }
461 }
462 }
463
getLanguagespecificLeadingBounds(const std::string & locale)464 size_t getLanguagespecificLeadingBounds(const std::string& locale)
465 {
466 static const std::unordered_set<std::string> specialLocales = {"ka", "hy", "pinyin", "el-monoton", "el-polyton"};
467 size_t lead = 2; // hardcoded for the most of the language pattern files
468 if (specialLocales.count(locale)) {
469 lead = 1;
470 }
471 return lead + 1; // we pad the target with surrounding marks ('.'), thus +1
472 }
473
getLanguagespecificTrailingBounds(const std::string & locale)474 size_t getLanguagespecificTrailingBounds(const std::string& locale)
475 {
476 static const std::unordered_set<std::string> threeCharLocales = {"en-gb", "et", "th", "pt", "ga",
477 "cs", "cy", "sk", "en-us"};
478 static const std::unordered_set<std::string> oneCharLocales = {"el-monoton", "el-polyton"};
479
480 size_t trail = 2; // hardcoded for the most of the language pattern files
481 if (threeCharLocales.count(locale)) {
482 trail = 3; // 3: At least three characters
483 } else if (oneCharLocales.count(locale)) {
484 trail = 1;
485 }
486 return trail; // we break before, so we don't add extra for end marker
487 }
488
formatResult(std::vector<uint8_t> & result,const size_t & leadingHyphmins,const size_t & trailingHyphmins,std::vector<uint8_t> & offsets)489 inline void formatResult(std::vector<uint8_t>& result, const size_t& leadingHyphmins, const size_t& trailingHyphmins,
490 std::vector<uint8_t>& offsets)
491 {
492 if (result.size() < leadingHyphmins || result.size() <= trailingHyphmins) {
493 // Not meeting the requirements
494 return;
495 }
496 for (size_t i = 0; i < leadingHyphmins; i++) {
497 result[i] = 0;
498 }
499
500 // remove front marker
501 result.erase(result.cbegin());
502
503 // move indices to match input multi chars
504 size_t pad = 0;
505 for (size_t i = 0; i < offsets.size(); i++) {
506 while (offsets[i] != 0) {
507 result.insert(result.begin() + i + pad, result[i + pad]);
508 TEXT_LOGD("Padding %{public}zu", i + pad);
509 offsets[i]--;
510 pad++;
511 }
512 }
513 // remove end marker and uncertain results
514 result.erase(result.cbegin() + result.size() - trailingHyphmins, result.cend());
515 }
516
findBreakPositions(const SkString & locale,const SkString & text,size_t startPos,size_t endPos)517 std::vector<uint8_t> Hyphenator::findBreakPositions(const SkString& locale, const SkString& text, size_t startPos,
518 size_t endPos)
519 {
520 TEXT_TRACE_FUNC();
521 TEXT_LOGD("Find break pos:%{public}zu %{public}zu %{public}zu", text.size(), startPos, endPos);
522 const std::string dummy(locale.c_str());
523 auto hyphenatorData = getHyphenatorData(dummy);
524 std::vector<uint8_t> result;
525
526 if (startPos > text.size() || endPos > text.size() || startPos > endPos) {
527 TEXT_LOGE("Hyphen error pos %{public}zu %{public}zu %{public}zu", text.size(), startPos, endPos);
528 return result;
529 }
530 const auto leadingHyphmins = getLanguagespecificLeadingBounds(dummy);
531 const auto trailingHyphmins = getLanguagespecificTrailingBounds(dummy);
532 // resolve potential break positions
533 if (!hyphenatorData.empty() && startPos + std::max(leadingHyphmins, trailingHyphmins) <= endPos) {
534 // typically need to have at least 4 characters for hyphenator to process
535 const auto lastword = std::string(text.c_str() + startPos, text.c_str() + endPos);
536 std::vector<uint16_t> word;
537 std::vector<uint8_t> offsets;
538 int32_t i = 0;
539 const int32_t textLength = static_cast<int32_t>(endPos - startPos);
540 UChar32 c = 0;
541 int32_t prev = i;
542 while (i < textLength) {
543 U8_NEXT(reinterpret_cast<const uint8_t*>(lastword.c_str()), i, textLength, c);
544 offsets.push_back(i - prev - U16_LENGTH(c));
545 if (U16_LENGTH(c) == 1) {
546 word.push_back(c);
547 } else {
548 word.push_back(U16_LEAD(c));
549 word.push_back(U16_TRAIL(c));
550 }
551 prev = i;
552 }
553
554 formatTarget(word);
555 if (word.size() > 3) { // 3: At least four characters, like '.ab.'
556 // Bulgarian pattern file tells only the positions where
557 // breaking is not allowed, we need to initialize defaults to allow breaking
558 const uint8_t defaultValue = (dummy == "bg") ? 1 : 0; // 0: break is not allowed, 1: break level 1
559 result.resize(word.size(), defaultValue);
560 findBreaks(hyphenatorData, word, result);
561 formatResult(result, leadingHyphmins, trailingHyphmins, offsets);
562 }
563 }
564 return result;
565 }
566 } // namespace textlayout
567 } // namespace skia
568 #endif
569