1 // Copyright 2019 Google LLC.
2 // Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
3
4 #include "include/core/SkTypes.h"
5 #include "include/private/SkTemplates.h"
6 #include "modules/skplaintexteditor/src/word_boundaries.h"
7
8 #include <unicode/ubrk.h>
9 #include <unicode/utext.h>
10 #include <unicode/utypes.h>
11 #include <memory>
12
13
14 namespace {
15 template <typename T,typename P,P* p> using resource = std::unique_ptr<T, SkFunctionWrapper<P, p>>;
16 using ICUBrk = resource<UBreakIterator, decltype(ubrk_close) , ubrk_close >;
17 using ICUUText = resource<UText , decltype(utext_close) , utext_close >;
18 } // namespace
19
GetUtf8WordBoundaries(const char * begin,size_t byteCount,const char * locale)20 std::vector<bool> GetUtf8WordBoundaries(const char* begin, size_t byteCount, const char* locale) {
21 std::vector<bool> result;
22 if (0 == byteCount) {
23 return result;
24 }
25 result.resize(byteCount);
26
27 UErrorCode status = U_ZERO_ERROR;
28 UText sUtf8UText = UTEXT_INITIALIZER;
29 ICUUText utf8UText(utext_openUTF8(&sUtf8UText, begin, byteCount, &status));
30 if (U_FAILURE(status)) {
31 SkDebugf("Could not create utf8UText: %s", u_errorName(status));
32 return result;
33 }
34
35 ICUBrk wordBreakIterator(ubrk_open(UBRK_WORD, locale, nullptr, 0, &status));
36 if (!wordBreakIterator || U_FAILURE(status)) {
37 SkDEBUGF("Could not create line break iterator: %s", u_errorName(status));
38 return result;
39 }
40
41 ubrk_setUText(&*wordBreakIterator, utf8UText.get(), &status);
42 if (U_FAILURE(status)) {
43 SkDebugf("Could not setText on break iterator: %s", u_errorName(status));
44 return result;
45 }
46
47 int32_t pos = ubrk_first(&*wordBreakIterator);
48 while (pos != UBRK_DONE) {
49 if ((size_t)pos < byteCount) {
50 result[pos] = true;
51 }
52 pos = ubrk_next(&*wordBreakIterator);
53 }
54 return result;
55 }
56