1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT
8
9 #include "src/objects/js-break-iterator.h"
10
11 #include "src/objects/intl-objects.h"
12 #include "src/objects/js-break-iterator-inl.h"
13 #include "src/objects/managed-inl.h"
14 #include "src/objects/option-utils.h"
15 #include "unicode/brkiter.h"
16
17 namespace v8 {
18 namespace internal {
19
20 namespace {
21 enum class Type { CHARACTER, WORD, SENTENCE, LINE };
22 } // anonymous namespace
23
New(Isolate * isolate,Handle<Map> map,Handle<Object> locales,Handle<Object> options_obj,const char * service)24 MaybeHandle<JSV8BreakIterator> JSV8BreakIterator::New(
25 Isolate* isolate, Handle<Map> map, Handle<Object> locales,
26 Handle<Object> options_obj, const char* service) {
27 Factory* factory = isolate->factory();
28
29 // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
30 Maybe<std::vector<std::string>> maybe_requested_locales =
31 Intl::CanonicalizeLocaleList(isolate, locales);
32 MAYBE_RETURN(maybe_requested_locales, MaybeHandle<JSV8BreakIterator>());
33 std::vector<std::string> requested_locales =
34 maybe_requested_locales.FromJust();
35
36 Handle<JSReceiver> options;
37 if (options_obj->IsUndefined(isolate)) {
38 options = factory->NewJSObjectWithNullProto();
39 } else {
40 ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
41 Object::ToObject(isolate, options_obj, service),
42 JSV8BreakIterator);
43 }
44
45 // Extract locale string
46 Maybe<Intl::MatcherOption> maybe_locale_matcher =
47 Intl::GetLocaleMatcher(isolate, options, service);
48 MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSV8BreakIterator>());
49 Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
50
51 Maybe<Intl::ResolvedLocale> maybe_resolve_locale =
52 Intl::ResolveLocale(isolate, JSV8BreakIterator::GetAvailableLocales(),
53 requested_locales, matcher, {});
54 if (maybe_resolve_locale.IsNothing()) {
55 THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
56 JSV8BreakIterator);
57 }
58 Intl::ResolvedLocale r = maybe_resolve_locale.FromJust();
59
60 // Extract type from options
61 Maybe<Type> maybe_type = GetStringOption<Type>(
62 isolate, options, "type", service,
63 {"word", "character", "sentence", "line"},
64 {Type::WORD, Type::CHARACTER, Type::SENTENCE, Type::LINE}, Type::WORD);
65 MAYBE_RETURN(maybe_type, MaybeHandle<JSV8BreakIterator>());
66 Type type_enum = maybe_type.FromJust();
67
68 icu::Locale icu_locale = r.icu_locale;
69 DCHECK(!icu_locale.isBogus());
70
71 // Construct break_iterator using icu_locale and type
72 UErrorCode status = U_ZERO_ERROR;
73 std::unique_ptr<icu::BreakIterator> break_iterator = nullptr;
74 switch (type_enum) {
75 case Type::CHARACTER:
76 break_iterator.reset(
77 icu::BreakIterator::createCharacterInstance(icu_locale, status));
78 break;
79 case Type::SENTENCE:
80 break_iterator.reset(
81 icu::BreakIterator::createSentenceInstance(icu_locale, status));
82 break;
83 case Type::LINE:
84 isolate->CountUsage(
85 v8::Isolate::UseCounterFeature::kBreakIteratorTypeLine);
86 break_iterator.reset(
87 icu::BreakIterator::createLineInstance(icu_locale, status));
88 break;
89 default:
90 isolate->CountUsage(
91 v8::Isolate::UseCounterFeature::kBreakIteratorTypeWord);
92 break_iterator.reset(
93 icu::BreakIterator::createWordInstance(icu_locale, status));
94 break;
95 }
96
97 // Error handling for break_iterator
98 if (U_FAILURE(status) || break_iterator.get() == nullptr) {
99 THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
100 JSV8BreakIterator);
101 }
102 isolate->CountUsage(v8::Isolate::UseCounterFeature::kBreakIterator);
103
104 // Construct managed objects from pointers
105 Handle<Managed<icu::BreakIterator>> managed_break_iterator =
106 Managed<icu::BreakIterator>::FromUniquePtr(isolate, 0,
107 std::move(break_iterator));
108 Handle<Managed<icu::UnicodeString>> managed_unicode_string =
109 Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, nullptr);
110
111 Handle<String> locale_str =
112 isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
113
114 // Now all properties are ready, so we can allocate the result object.
115 Handle<JSV8BreakIterator> break_iterator_holder =
116 Handle<JSV8BreakIterator>::cast(
117 isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
118 DisallowGarbageCollection no_gc;
119 break_iterator_holder->set_locale(*locale_str);
120 break_iterator_holder->set_break_iterator(*managed_break_iterator);
121 break_iterator_holder->set_unicode_string(*managed_unicode_string);
122
123 // Return break_iterator_holder
124 return break_iterator_holder;
125 }
126
127 namespace {
128
GetType(icu::BreakIterator * break_iterator)129 Type GetType(icu::BreakIterator* break_iterator) {
130 // Since the developer calling the Intl.v8BreakIterator already know the type,
131 // we usually do not need to know the type unless the resolvedOptions() is
132 // called, we use the following trick to figure out the type instead of
133 // storing it with the JSV8BreakIterator object to save memory.
134 // This routine is not fast but should be seldomly used only.
135
136 // We need to clone a copy of break iteator because we need to setText to it.
137 std::unique_ptr<icu::BreakIterator> cloned_break_iterator(
138 break_iterator->clone());
139 // Use a magic string "He is." to call next().
140 // character type: will return 1 for "H"
141 // word type: will return 2 for "He"
142 // line type: will return 3 for "He "
143 // sentence type: will return 6 for "He is."
144 icu::UnicodeString data("He is.");
145 cloned_break_iterator->setText(data);
146 switch (cloned_break_iterator->next()) {
147 case 1: // After "H"
148 return Type::CHARACTER;
149 case 2: // After "He"
150 return Type::WORD;
151 case 3: // After "He "
152 return Type::LINE;
153 case 6: // After "He is."
154 return Type::SENTENCE;
155 default:
156 UNREACHABLE();
157 }
158 }
159
TypeAsString(Isolate * isolate,Type type)160 Handle<String> TypeAsString(Isolate* isolate, Type type) {
161 switch (type) {
162 case Type::CHARACTER:
163 return ReadOnlyRoots(isolate).character_string_handle();
164 case Type::WORD:
165 return ReadOnlyRoots(isolate).word_string_handle();
166 case Type::SENTENCE:
167 return ReadOnlyRoots(isolate).sentence_string_handle();
168 case Type::LINE:
169 return ReadOnlyRoots(isolate).line_string_handle();
170 }
171 UNREACHABLE();
172 }
173
174 } // anonymous namespace
175
ResolvedOptions(Isolate * isolate,Handle<JSV8BreakIterator> break_iterator)176 Handle<JSObject> JSV8BreakIterator::ResolvedOptions(
177 Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
178 Factory* factory = isolate->factory();
179
180 Type type = GetType(break_iterator->break_iterator().raw());
181
182 Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
183 Handle<String> locale(break_iterator->locale(), isolate);
184
185 JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
186 NONE);
187 JSObject::AddProperty(isolate, result, factory->type_string(),
188 TypeAsString(isolate, type), NONE);
189 return result;
190 }
191
AdoptText(Isolate * isolate,Handle<JSV8BreakIterator> break_iterator_holder,Handle<String> text)192 void JSV8BreakIterator::AdoptText(
193 Isolate* isolate, Handle<JSV8BreakIterator> break_iterator_holder,
194 Handle<String> text) {
195 icu::BreakIterator* break_iterator =
196 break_iterator_holder->break_iterator().raw();
197 DCHECK_NOT_NULL(break_iterator);
198 Handle<Managed<icu::UnicodeString>> unicode_string =
199 Intl::SetTextToBreakIterator(isolate, text, break_iterator);
200 break_iterator_holder->set_unicode_string(*unicode_string);
201 }
202
Current(Isolate * isolate,Handle<JSV8BreakIterator> break_iterator)203 Handle<Object> JSV8BreakIterator::Current(
204 Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
205 return isolate->factory()->NewNumberFromInt(
206 break_iterator->break_iterator().raw()->current());
207 }
208
First(Isolate * isolate,Handle<JSV8BreakIterator> break_iterator)209 Handle<Object> JSV8BreakIterator::First(
210 Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
211 return isolate->factory()->NewNumberFromInt(
212 break_iterator->break_iterator().raw()->first());
213 }
214
Next(Isolate * isolate,Handle<JSV8BreakIterator> break_iterator)215 Handle<Object> JSV8BreakIterator::Next(
216 Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
217 return isolate->factory()->NewNumberFromInt(
218 break_iterator->break_iterator().raw()->next());
219 }
220
BreakType(Isolate * isolate,Handle<JSV8BreakIterator> break_iterator)221 String JSV8BreakIterator::BreakType(Isolate* isolate,
222 Handle<JSV8BreakIterator> break_iterator) {
223 int32_t status = break_iterator->break_iterator().raw()->getRuleStatus();
224 // Keep return values in sync with JavaScript BreakType enum.
225 if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
226 return ReadOnlyRoots(isolate).none_string();
227 }
228 if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
229 return ReadOnlyRoots(isolate).number_string();
230 }
231 if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
232 return ReadOnlyRoots(isolate).letter_string();
233 }
234 if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
235 return ReadOnlyRoots(isolate).kana_string();
236 }
237 if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
238 return ReadOnlyRoots(isolate).ideo_string();
239 }
240 return ReadOnlyRoots(isolate).unknown_string();
241 }
242
GetAvailableLocales()243 const std::set<std::string>& JSV8BreakIterator::GetAvailableLocales() {
244 return Intl::GetAvailableLocales();
245 }
246
247 } // namespace internal
248 } // namespace v8
249