• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #ifdef OHOS_SUPPORT
16 #include <algorithm>
17 #ifdef _WIN32
18 #include <cstdlib>
19 #else
20 #include <cstddef>
21 #include <climits>
22 #endif
23 #include <iostream>
24 #include <fstream>
25 #include <unicode/utf.h>
26 #include <unicode/utf8.h>
27 #include <unordered_set>
28 
29 #include "include/Hyphenator.h"
30 #include "log.h"
31 
32 namespace skia {
33 namespace textlayout {
34 std::once_flag Hyphenator::initFlag;
35 const std::map<std::string, std::string> HPB_FILE_NAMES = {
36     {"as", "hyph-as.hpb"},                 // Assamese
37     {"be", "hyph-be.hpb"},                 // Belarusian
38     {"bg", "hyph-bg.hpb"},                 // Bulgarian
39     {"bn", "hyph-bn.hpb"},                 // Bengali
40     {"cs", "hyph-cs.hpb"},                 // Czech
41     {"cy", "hyph-cy.hpb"},                 // Welsh
42     {"da", "hyph-da.hpb"},                 // Danish
43     {"de-1996", "hyph-de-1996.hpb"},       // German,1996orthography
44     {"de-1901", "hyph-de-1901.hpb"},       // German,1901orthography
45     {"de-ch-1901", "hyph-de-ch-1901.hpb"}, // SwissGerman,1901orthography
46     {"el-monoton", "hyph-el-monoton.hpb"}, // ModernGreek,monotonic
47     {"el-polyton", "hyph-el-polyton.hpb"}, // ModernGreek,polytonic
48     {"en-latn", "hyph-en-gb.hpb"},         // Latin English
49     {"en-gb", "hyph-en-gb.hpb"},           // British English
50     {"en-us", "hyph-en-us.hpb"},           // American English
51     {"es", "hyph-es.hpb"},                 // Spanish
52     {"et", "hyph-et.hpb"},                 // Estonian
53     {"fr", "hyph-fr.hpb"},                 // French
54     {"ga", "hyph-ga.hpb"},                 // Irish
55     {"gl", "hyph-gl.hpb"},                 // Galician
56     {"gu", "hyph-gu.hpb"},                 // Gujarati
57     {"hi", "hyph-hi.hpb"},                 // Hindi
58     {"hr", "hyph-hr.hpb"},                 // Croatian
59     {"hu", "hyph-hu.hpb"},                 // Hungarian
60     {"hy", "hyph-hy.hpb"},                 // Armenian
61     {"id", "hyph-id.hpb"},                 // Indonesian
62     {"is", "hyph-is.hpb"},                 // Icelandic
63     {"it", "hyph-it.hpb"},                 // Italian
64     {"ka", "hyph-ka.hpb"},                 // Georgian
65     {"kn", "hyph-kn.hpb"},                 // Kannada
66     {"la", "hyph-la.hpb"},                 // Latin
67     {"lt", "hyph-lt.hpb"},                 // Lithuanian
68     {"lv", "hyph-lv.hpb"},                 // Latvian
69     {"mk", "hyph-mk.hpb"},                 // Macedonian
70     {"ml", "hyph-ml.hpb"},                 // Malayalam
71     {"mn-cyrl", "hyph-mn-cyrl.hpb"},       // Mongolian,Cyrillicscript
72     {"mr", "hyph-mr.hpb"},                 // Marathi
73     {"mul-ethi", "hyph-mul-ethi.hpb"},     // Ethiopic
74     {"nl", "hyph-nl.hpb"},                 // Dutch
75     {"or", "hyph-or.hpb"},                 // Oriya
76     {"pa", "hyph-pa.hpb"},                 // Punjabi
77     {"pl", "hyph-pl.hpb"},                 // Polish
78     {"pt", "hyph-pt.hpb"},                 // Portuguese
79     {"rm", "hyph-rm.hpb"},                 // Romansh
80     {"ru", "hyph-ru.hpb"},                 // Russian
81     {"sh-cyrl", "hyph-sh-cyrl.hpb"},       // Serbo-Croatian,Cyrillicscript
82     {"sh-latn", "hyph-sh-latn.hpb"},       // Serbo-Croatian,Latinscript
83     {"sk", "hyph-sk.hpb"},                 // Slovak
84     {"sl", "hyph-sl.hpb"},                 // Slovenian
85     {"sr-cyrl", "hyph-sr-cyrl.hpb"},       // Serbian,Cyrillicscript
86     {"sv", "hyph-sv.hpb"},                 // Swedish
87     {"ta", "hyph-ta.hpb"},                 // Tamil
88     {"te", "hyph-te.hpb"},                 // Telugu
89     {"th", "hyph-th.hpb"},                 // Thai
90     {"tk", "hyph-tk.hpb"},                 // Turkmen
91     {"tr", "hyph-tr.hpb"},                 // Turkish
92     {"uk", "hyph-uk.hpb"},                 // Ukrainian
93     {"pinyin", "hyph-zh-latn-pinyin.hpb"}, // Chinese,Pinyin. language code ‘pinyin’ is not right,will be repair later
94 };
95 
96 // in hyphenation, when a word ends with below chars, the char(s) is stripped during hyphenation.
97 const std::unordered_set<uint16_t> EXCLUDED_WORD_ENDING_CHARS = {
98     0x21, // !
99     0x22, // "
100     0x23, // #
101     0x24, // $
102     0x25, // %
103     0x26, // &
104     0x27, // '
105     0x28, // (
106     0x29, // )
107     0x2A, // *
108     0x2B, // +
109     0x2C, // ,
110     0x2D, // -
111     0x2e, // .
112     0x2f, // /
113     0x3A, // :
114     0x3b, // ;
115     0x3C, // <
116     0x3D, // =
117     0x3E, // >
118     0x3F  // ?
119 };
120 
121 struct HyphenTableInfo {
122     const HyphenatorHeader* header{nullptr};
123     const uint32_t* maindict{nullptr};
124     const ArrayOf16bits* mappings{nullptr};
125 
initHyphenTableInfoskia::textlayout::HyphenTableInfo126     bool initHyphenTableInfo(const std::vector<uint8_t>& hyphenatorData)
127     {
128         if (hyphenatorData.size() < sizeof(HyphenatorHeader)) {
129             return false;
130         }
131         header = reinterpret_cast<const HyphenatorHeader*>(hyphenatorData.data());
132         // get master table, it always is in direct mode
133         maindict = (uint32_t*)(hyphenatorData.data() + header->toc);
134         mappings = reinterpret_cast<const ArrayOf16bits*>(hyphenatorData.data() + header->mappings);
135         // this is actually beyond the real 32 bit address, but just to have an offset that
136         // is clearly out of bounds without recalculating it again
137         return !(header->minCp == header->maxCp && mappings->count == 0);
138     }
139 };
140 
141 struct HyphenSubTable {
142     uint16_t* staticOffset{nullptr};
143     uint32_t nextOffset{0};
144     PathType type{PathType::PATTERN};
145 
initHyphenSubTableInfoskia::textlayout::HyphenSubTable146     bool initHyphenSubTableInfo(uint16_t& code, uint16_t& offset, HyphenTableInfo& hyphenInfo)
147     {
148         auto header = hyphenInfo.header;
149         if (offset == header->maxCount(hyphenInfo.mappings)) {
150             code = 0;
151             return false;
152         }
153 
154         uint32_t baseOffset = *(hyphenInfo.maindict + offset - 1); // previous entry end
155         uint32_t initialValue = *(hyphenInfo.maindict + offset);
156         this->type = (PathType)(initialValue >> HYPHEN_SHIFT_BITS_30);
157         // direct and pairs need to have offset different from zero
158         if (initialValue == 0 && (type == PathType::DIRECT || type == PathType::PAIRS)) {
159             return false;
160         }
161         // base offset is 16 bit
162         auto address = reinterpret_cast<const uint8_t*>(header);
163         this->staticOffset = (uint16_t*)(address + HYPHEN_BASE_CODE_SHIFT * baseOffset);
164 
165         // get a subtable according character
166         // once: read as 32bit, the rest of the access will be 16bit (13bit for offsets)
167         this->nextOffset = (initialValue & 0x3fffffff);
168         return true;
169     }
170 };
171 
172 struct HyphenFindBreakParam {
173     const HyphenatorHeader* header{nullptr};
174     HyphenSubTable hyphenSubTable;
175     uint16_t code{0};
176     uint16_t offset{0};
177 };
178 
ReadBinaryFile(const std::string & filePath,std::vector<uint8_t> & buffer)179 void ReadBinaryFile(const std::string& filePath, std::vector<uint8_t>& buffer)
180 {
181     char tmpPath[PATH_MAX] = {0};
182     if (filePath.size() > PATH_MAX) {
183         TEXT_LOGE("File name is too long");
184         return;
185     }
186 #ifdef _WIN32
187     auto canonicalFilePath = _fullpath(tmpPath, filePath.c_str(), sizeof(tmpPath));
188 #else
189     auto canonicalFilePath = realpath(filePath.c_str(), tmpPath);
190 #endif
191     if (canonicalFilePath == nullptr) {
192         TEXT_LOGE("Invalid file %{public}s", filePath.c_str());
193         return;
194     }
195     std::ifstream file(canonicalFilePath, std::ifstream::binary);
196     if (!file.is_open()) {
197         TEXT_LOGE("Failed to open %{public}s", filePath.c_str());
198         return;
199     }
200 
201     file.seekg(0, std::ios::end);
202     std::streamsize length = file.tellg();
203     file.seekg(0, std::ios::beg);
204 
205     buffer.resize(length);
206     if (!file.read(reinterpret_cast<char*>(buffer.data()), length)) {
207         TEXT_LOGE("Failed to read %{public}s", filePath.c_str());
208     }
209     file.close();
210 }
211 
getLanguageCode(std::string locale,int hyphenPos)212 std::string getLanguageCode(std::string locale, int hyphenPos)
213 {
214     // to lower case
215     std::transform(locale.begin(), locale.end(), locale.begin(), ::tolower);
216 
217     // find '-',substring the locale
218     size_t pos = std::string::npos;
219     int count = 0;
220     for (size_t i = 0; i < locale.size(); ++i) {
221         if (locale[i] == '-') {
222             ++count;
223             if (count == hyphenPos) {
224                 pos = i;
225                 break;
226             }
227         }
228     }
229 
230     if (pos != std::string::npos) {
231         return locale.substr(0, pos);
232     } else {
233         return locale;
234     }
235 }
236 
initTrieTree()237 void Hyphenator::initTrieTree()
238 {
239     for (const auto& item : HPB_FILE_NAMES) {
240         fTrieTree.insert(item.first, item.second);
241     }
242 }
243 
getHyphenatorData(const std::string & locale)244 const std::vector<uint8_t>& Hyphenator::getHyphenatorData(const std::string& locale)
245 {
246     const std::vector<uint8_t>& firstResult =
247         findHyphenatorData(getLanguageCode(locale, 2)); //num 2:sub string locale to the second '-'
248     if (!firstResult.empty()) {
249         return firstResult;
250     } else {
251         return findHyphenatorData(getLanguageCode(locale, 1));
252     }
253 }
254 
findHyphenatorData(const std::string & langCode)255 const std::vector<uint8_t>& Hyphenator::findHyphenatorData(const std::string& langCode)
256 {
257     {
258         std::shared_lock<std::shared_mutex> readLock(mutex_);
259         auto search = fHyphenMap.find(langCode);
260         if (search != fHyphenMap.end()) {
261             return search->second;
262         }
263     }
264 
265     return loadPatternFile(langCode);
266 }
267 
loadPatternFile(const std::string & langCode)268 const std::vector<uint8_t>& Hyphenator::loadPatternFile(const std::string& langCode)
269 {
270     std::unique_lock<std::shared_mutex> writeLock(mutex_);
271     auto search = fHyphenMap.find(langCode);
272     if (search != fHyphenMap.end()) {
273         return search->second;
274     }
275     std::string hpbFileName = fTrieTree.findPartialMatch(langCode);
276     if (!hpbFileName.empty()) {
277         std::string filename = "/system/usr/ohos_hyphen_data/" + hpbFileName;
278         std::vector<uint8_t> fileBuffer;
279         ReadBinaryFile(filename, fileBuffer);
280         if (!fileBuffer.empty()) {
281             fHyphenMap.emplace(langCode, std::move(fileBuffer));
282             return fHyphenMap[langCode];
283         }
284     }
285     return fEmptyResult;
286 }
287 
formatTarget(std::vector<uint16_t> & target)288 void formatTarget(std::vector<uint16_t>& target)
289 {
290     while (EXCLUDED_WORD_ENDING_CHARS.find(target.back()) != EXCLUDED_WORD_ENDING_CHARS.end()) {
291         target.pop_back();
292     }
293     target.insert(target.cbegin(), '.');
294     target.push_back('.');
295 
296     for (auto& code : target) {
297         HyphenatorHeader::toLower(code);
298     }
299 }
300 
processPattern(const Pattern * p,size_t count,uint32_t index,std::vector<uint16_t> & word,std::vector<uint8_t> & res)301 void processPattern(const Pattern* p, size_t count, uint32_t index, std::vector<uint16_t>& word,
302                     std::vector<uint8_t>& res)
303 {
304     TEXT_LOGD("Index:%{public}u", index);
305     if (count > 0) {
306         count *= 0x4; // patterns are padded to 4 byte arrays
307         // when we reach pattern node (leaf), we need to increase index by one because of our
308         // own code offset
309         for (size_t currentIndex = 0; index < res.size() && currentIndex < count; index++) {
310             TEXT_LOGD("Pattern info:%{public}zu, %{public}u, 0x%{public}x", count, index, p->patterns[currentIndex]);
311             res[index] = std::max(res[index], (p->patterns[currentIndex]));
312             currentIndex++;
313         }
314     }
315 }
316 
processLinear(uint16_t * data,size_t index,HyphenFindBreakParam & param,std::vector<uint16_t> & word,std::vector<uint8_t> & res)317 void processLinear(uint16_t* data, size_t index, HyphenFindBreakParam& param, std::vector<uint16_t>& word,
318                    std::vector<uint8_t>& res)
319 {
320     TEXT_LOGD("Index:%{public}zu", index);
321     const ArrayOf16bits* p = reinterpret_cast<const ArrayOf16bits*>(data);
322     uint16_t count = p->count;
323     if (count > index + 1) {
324         // the pattern is longer than the remaining word
325         return;
326     }
327     index--;
328 
329     // check the rest of the string
330     for (auto j = 0; j < count; j++) {
331         if (p->codes[j] != word[index]) {
332             return;
333         } else {
334             index--;
335         }
336     }
337     uint32_t offset = 1 + count; // array size, code points, no padding for 16 bit
338     uint16_t pOffset = *(data + offset);
339     offset++; // move forward, after pattern
340     if (!pOffset) {
341         return;
342     }
343 
344     const Pattern* matchPattern =
345         reinterpret_cast<const Pattern*>(reinterpret_cast<const uint8_t*>(param.header) + (pOffset & 0xfff));
346     index++; // matching peeks ahead
347     processPattern(matchPattern, (pOffset >> 0xc), index, word, res);
348     if (*(data + offset) != 0) { // peek if there is more to come
349         return processLinear(data + offset, index, param, word, res);
350     }
351 }
352 
processDirect(uint16_t * data,HyphenFindBreakParam & param,uint32_t & nextOffset,PathType & type)353 bool processDirect(uint16_t* data, HyphenFindBreakParam& param, uint32_t& nextOffset, PathType& type)
354 {
355     TEXT_LOGD("");
356     param.offset = param.header->codeOffset(param.code);
357     if (param.header->minCp != param.header->maxCp && param.offset > param.header->maxCp) {
358         return false;
359     }
360     uint16_t nextValue = *(data + nextOffset + param.offset);
361     nextOffset = nextValue & 0x3fff; // Use mask 0x3fff to extract the lower 14 bits of nextValue
362     type = (PathType)(nextValue >> HYPHEN_SHIFT_BITS_14);
363     return true;
364 }
365 
processPairs(const ArrayOf16bits * data,HyphenFindBreakParam & param,uint16_t code,uint32_t & nextOffset,PathType & type)366 bool processPairs(const ArrayOf16bits* data, HyphenFindBreakParam& param, uint16_t code, uint32_t& nextOffset,
367                   PathType& type)
368 {
369     TEXT_LOGD("Code:0x%{public}x", code);
370     uint16_t count = data->count;
371     bool match = false;
372     for (size_t j = 0; j < count; j += HYPHEN_BASE_CODE_SHIFT) {
373         if (data->codes[j] == code) {
374             nextOffset = data->codes[j + 1] & 0x3fff;
375             type = (PathType)(data->codes[j + 1] >> HYPHEN_SHIFT_BITS_14);
376             match = true;
377             break;
378         } else if (data->codes[j] > code) {
379             break;
380         }
381     }
382     return match;
383 }
384 
findBreakByType(HyphenFindBreakParam & param,const size_t & targetIndex,std::vector<uint16_t> & target,std::vector<uint8_t> & result)385 void findBreakByType(HyphenFindBreakParam& param, const size_t& targetIndex, std::vector<uint16_t>& target,
386                      std::vector<uint8_t>& result)
387 {
388     TEXT_LOGD("TopLevel:%{public}zu", targetIndex);
389     auto [staticOffset, nextOffset, type] = param.hyphenSubTable;
390     uint32_t index = 0; // used in inner loop to traverse path further (backwards)
391     while (true) {
392         TEXT_LOGD("Loop:%{public}zu %{public}u", targetIndex, index);
393         // there is always at 16bit of pattern address before next node data
394         uint16_t pOffset = *(staticOffset + nextOffset);
395         // from binary version 2 onwards, we have common nodes with 16bit offset (not bound to code points)
396         if (type == PathType::PATTERN && (param.header->version >> 0x18) > 1) {
397             pOffset =
398                 *(reinterpret_cast<const uint16_t*>(param.header) + nextOffset + (param.header->version & 0xffff));
399         }
400         nextOffset++;
401         if (pOffset) {
402             // if we have reached pattern, apply it to result
403             uint16_t count = (pOffset >> 0xc);
404             pOffset = 0xfff & pOffset;
405             auto p = reinterpret_cast<const Pattern*>(reinterpret_cast<const uint8_t*>(param.header) + pOffset);
406             processPattern(p, count, targetIndex - index, target, result);
407         }
408         if (type == PathType::PATTERN) {
409             // just break the loop
410             break;
411         } else if (type == PathType::DIRECT) {
412             if (index == targetIndex) {
413                 break;
414             }
415             index++; // resolve new code point (on the left)
416             param.code = target[targetIndex - index];
417             if (!processDirect(staticOffset, param, nextOffset, type)) {
418                 break;
419             }
420         } else if (type == PathType::LINEAR) {
421             processLinear((staticOffset + nextOffset), targetIndex - index, param, target, result);
422             // when a linear element has been processed, we always break and move to next top level index
423             break;
424         } else {
425             if (index == targetIndex) {
426                 break;
427             }
428             index++;
429             auto p = reinterpret_cast<const ArrayOf16bits*>(staticOffset + nextOffset);
430             if (!processPairs(p, param, target[targetIndex - index], nextOffset, type)) {
431                 break;
432             }
433         }
434     }
435 }
436 
findBreaks(const std::vector<uint8_t> & hyphenatorData,std::vector<uint16_t> & target,std::vector<uint8_t> & result)437 void findBreaks(const std::vector<uint8_t>& hyphenatorData, std::vector<uint16_t>& target, std::vector<uint8_t>& result)
438 {
439     HyphenTableInfo hyphenInfo;
440     if (!hyphenInfo.initHyphenTableInfo(hyphenatorData)) {
441         return;
442     }
443 
444     if (target.size() > 0) {
445         for (size_t i = target.size() - 1; i >= 1; --i) {
446             HyphenSubTable hyphenSubTable;
447             auto header = hyphenInfo.header;
448             auto code = target[i];
449             auto offset = header->codeOffset(code, hyphenInfo.mappings);
450             if (!hyphenSubTable.initHyphenSubTableInfo(code, offset, hyphenInfo)) {
451                 continue;
452             }
453             HyphenFindBreakParam param{header, hyphenSubTable, code, offset};
454             findBreakByType(param, i, target, result);
455         }
456     }
457 }
458 
getLanguagespecificLeadingBounds(const std::string & locale)459 size_t getLanguagespecificLeadingBounds(const std::string& locale)
460 {
461     static const std::unordered_set<std::string> specialLocales = {"ka", "hy", "pinyin", "el-monoton", "el-polyton"};
462     size_t lead = 2; // hardcoded for the most of the language pattern files
463     if (specialLocales.count(locale)) {
464         lead = 1;
465     }
466     return lead + 1; // we pad the target with surrounding marks ('.'), thus +1
467 }
468 
getLanguagespecificTrailingBounds(const std::string & locale)469 size_t getLanguagespecificTrailingBounds(const std::string& locale)
470 {
471     static const std::unordered_set<std::string> threeCharLocales = {"en-gb", "et", "th", "pt", "ga",
472                                                                      "cs", "cy", "sk", "en-us"};
473     static const std::unordered_set<std::string> oneCharLocales = {"el-monoton", "el-polyton"};
474 
475     size_t trail = 2; // hardcoded for the most of the language pattern files
476     if (threeCharLocales.count(locale)) {
477         trail = 3; // 3: At least three characters
478     } else if (oneCharLocales.count(locale)) {
479         trail = 1;
480     }
481     return trail; // we break before, so we don't add extra for end marker
482 }
483 
formatResult(std::vector<uint8_t> & result,const size_t & leadingHyphmins,const size_t & trailingHyphmins,std::vector<uint8_t> & offsets)484 inline void formatResult(std::vector<uint8_t>& result, const size_t& leadingHyphmins, const size_t& trailingHyphmins,
485                          std::vector<uint8_t>& offsets)
486 {
487     for (size_t i = 0; i < leadingHyphmins; i++) {
488         result[i] = 0;
489     }
490 
491     // remove front marker
492     result.erase(result.cbegin());
493 
494     // move indices to match input multi chars
495     size_t pad = 0;
496     for (size_t i = 0; i < offsets.size(); i++) {
497         while (offsets[i] != 0) {
498             result.insert(result.begin() + i + pad, result[i + pad]);
499             TEXT_LOGD("Padding %{public}zu", i + pad);
500             offsets[i]--;
501             pad++;
502         }
503     }
504     // remove end marker and uncertain results
505     result.erase(result.cbegin() + result.size() - trailingHyphmins, result.cend());
506 }
507 
findBreakPositions(const SkString & locale,const SkString & text,size_t startPos,size_t endPos)508 std::vector<uint8_t> Hyphenator::findBreakPositions(const SkString& locale, const SkString& text, size_t startPos,
509                                                     size_t endPos)
510 {
511     TEXT_LOGD("Find break pos:%{public}zu %{public}zu %{public}zu", text.size(), startPos, endPos);
512     const std::string dummy(locale.c_str());
513     auto hyphenatorData = getHyphenatorData(dummy);
514     std::vector<uint8_t> result;
515 
516     if (startPos > text.size() || endPos > text.size() || startPos > endPos) {
517         TEXT_LOGE("Hyphen error pos %{public}zu %{public}zu %{public}zu", text.size(), startPos, endPos);
518         return result;
519     }
520     const auto leadingHyphmins = getLanguagespecificLeadingBounds(dummy);
521     const auto trailingHyphmins = getLanguagespecificTrailingBounds(dummy);
522     // resolve potential break positions
523     if (!hyphenatorData.empty() && startPos + std::max(leadingHyphmins, trailingHyphmins) <= endPos) {
524         // typically need to have at least 4 characters for hyphenator to process
525         const auto lastword = std::string(text.c_str() + startPos, text.c_str() + endPos);
526         std::vector<uint16_t> word;
527         std::vector<uint8_t> offsets;
528         int32_t i = 0;
529         const int32_t textLength = static_cast<int32_t>(endPos - startPos);
530         UChar32 c = 0;
531         int32_t prev = i;
532         while (i < textLength) {
533             U8_NEXT(reinterpret_cast<const uint8_t*>(lastword.c_str()), i, textLength, c);
534             offsets.push_back(i - prev - U16_LENGTH(c));
535             if (U16_LENGTH(c) == 1) {
536                 word.push_back(c);
537             } else {
538                 word.push_back(U16_LEAD(c));
539                 word.push_back(U16_TRAIL(c));
540             }
541             prev = i;
542         }
543 
544         formatTarget(word);
545         // Bulgarian pattern file tells only the positions where
546         // breaking is not allowed, we need to initialize defaults to allow breaking
547         const uint8_t defaultValue = (dummy == "bg") ? 1 : 0; // 0: break is not allowed, 1: break level 1
548         result.resize(word.size(), defaultValue);
549         findBreaks(hyphenatorData, word, result);
550         formatResult(result, leadingHyphmins, trailingHyphmins, offsets);
551     }
552     return result;
553 }
554 } // namespace textlayout
555 } // namespace skia
556 #endif
557