• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif  // V8_INTL_SUPPORT
8 
9 #include "src/objects/js-segments.h"
10 
11 #include <map>
12 #include <memory>
13 #include <string>
14 
15 #include "src/execution/isolate.h"
16 #include "src/heap/factory.h"
17 #include "src/objects/intl-objects.h"
18 #include "src/objects/js-segment-iterator.h"
19 #include "src/objects/js-segmenter-inl.h"
20 #include "src/objects/js-segments-inl.h"
21 #include "src/objects/managed.h"
22 #include "src/objects/objects-inl.h"
23 #include "unicode/brkiter.h"
24 
25 namespace v8 {
26 namespace internal {
27 
28 // ecma402 #sec-createsegmentsobject
Create(Isolate * isolate,Handle<JSSegmenter> segmenter,Handle<String> string)29 MaybeHandle<JSSegments> JSSegments::Create(Isolate* isolate,
30                                            Handle<JSSegmenter> segmenter,
31                                            Handle<String> string) {
32   icu::BreakIterator* break_iterator =
33       segmenter->icu_break_iterator().raw()->clone();
34   DCHECK_NOT_NULL(break_iterator);
35 
36   Handle<Managed<icu::UnicodeString>> unicode_string =
37       Intl::SetTextToBreakIterator(isolate, string, break_iterator);
38   Handle<Managed<icu::BreakIterator>> managed_break_iterator =
39       Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
40 
41   // 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
42   // 2. Let segments be ! ObjectCreate(%Segments.prototype%, internalSlotsList).
43   Handle<Map> map(isolate->native_context()->intl_segments_map(), isolate);
44   Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
45 
46   Handle<JSSegments> segments = Handle<JSSegments>::cast(result);
47   segments->set_flags(0);
48 
49   // 3. Set segments.[[SegmentsSegmenter]] to segmenter.
50   segments->set_icu_break_iterator(*managed_break_iterator);
51   segments->set_granularity(segmenter->granularity());
52 
53   // 4. Set segments.[[SegmentsString]] to string.
54   segments->set_unicode_string(*unicode_string);
55 
56   // 5. Return segments.
57   return segments;
58 }
59 
60 // ecma402 #sec-%segmentsprototype%.containing
Containing(Isolate * isolate,Handle<JSSegments> segments,double n_double)61 MaybeHandle<Object> JSSegments::Containing(Isolate* isolate,
62                                            Handle<JSSegments> segments,
63                                            double n_double) {
64   // 5. Let len be the length of string.
65   int32_t len = segments->unicode_string().raw()->length();
66 
67   // 7. If n < 0 or n ≥ len, return undefined.
68   if (n_double < 0 || n_double >= len) {
69     return isolate->factory()->undefined_value();
70   }
71 
72   int32_t n = static_cast<int32_t>(n_double);
73   // n may point to the surrogate tail- adjust it back to the lead.
74   n = segments->unicode_string().raw()->getChar32Start(n);
75 
76   icu::BreakIterator* break_iterator = segments->icu_break_iterator().raw();
77   // 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
78   int32_t start_index =
79       break_iterator->isBoundary(n) ? n : break_iterator->preceding(n);
80 
81   // 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
82   int32_t end_index = break_iterator->following(n);
83 
84   // 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex,
85   // endIndex).
86   return CreateSegmentDataObject(
87       isolate, segments->granularity(), break_iterator,
88       *(segments->unicode_string().raw()), start_index, end_index);
89 }
90 
91 namespace {
92 
CurrentSegmentIsWordLike(icu::BreakIterator * break_iterator)93 bool CurrentSegmentIsWordLike(icu::BreakIterator* break_iterator) {
94   int32_t rule_status = break_iterator->getRuleStatus();
95   return (rule_status >= UBRK_WORD_NUMBER &&
96           rule_status < UBRK_WORD_NUMBER_LIMIT) ||
97          (rule_status >= UBRK_WORD_LETTER &&
98           rule_status < UBRK_WORD_LETTER_LIMIT) ||
99          (rule_status >= UBRK_WORD_KANA &&
100           rule_status < UBRK_WORD_KANA_LIMIT) ||
101          (rule_status >= UBRK_WORD_IDEO && rule_status < UBRK_WORD_IDEO_LIMIT);
102 }
103 
104 }  // namespace
105 
106 // ecma402 #sec-createsegmentdataobject
CreateSegmentDataObject(Isolate * isolate,JSSegmenter::Granularity granularity,icu::BreakIterator * break_iterator,const icu::UnicodeString & string,int32_t start_index,int32_t end_index)107 MaybeHandle<Object> JSSegments::CreateSegmentDataObject(
108     Isolate* isolate, JSSegmenter::Granularity granularity,
109     icu::BreakIterator* break_iterator, const icu::UnicodeString& string,
110     int32_t start_index, int32_t end_index) {
111   Factory* factory = isolate->factory();
112 
113   // 1. Let len be the length of string.
114   // 2. Assert: startIndex ≥ 0.
115   DCHECK_GE(start_index, 0);
116   // 3. Assert: endIndex ≤ len.
117   DCHECK_LE(end_index, string.length());
118   // 4. Assert: startIndex < endIndex.
119   DCHECK_LT(start_index, end_index);
120 
121   // 5. Let result be ! ObjectCreate(%ObjectPrototype%).
122   Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
123 
124   // 6. Let segment be the String value equal to the substring of string
125   // consisting of the code units at indices startIndex (inclusive) through
126   // endIndex (exclusive).
127   Handle<String> segment;
128   ASSIGN_RETURN_ON_EXCEPTION(
129       isolate, segment, Intl::ToString(isolate, string, start_index, end_index),
130       JSObject);
131 
132   // 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
133   Maybe<bool> maybe_create_segment = JSReceiver::CreateDataProperty(
134       isolate, result, factory->segment_string(), segment, Just(kDontThrow));
135   DCHECK(maybe_create_segment.FromJust());
136   USE(maybe_create_segment);
137 
138   // 8. Perform ! CreateDataPropertyOrThrow(result, "index", startIndex).
139   Maybe<bool> maybe_create_index = JSReceiver::CreateDataProperty(
140       isolate, result, factory->index_string(),
141       factory->NewNumberFromInt(start_index), Just(kDontThrow));
142   DCHECK(maybe_create_index.FromJust());
143   USE(maybe_create_index);
144 
145   // 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
146   Handle<String> input_string;
147   ASSIGN_RETURN_ON_EXCEPTION(isolate, input_string,
148                              Intl::ToString(isolate, string), JSObject);
149   Maybe<bool> maybe_create_input = JSReceiver::CreateDataProperty(
150       isolate, result, factory->input_string(), input_string, Just(kDontThrow));
151   DCHECK(maybe_create_input.FromJust());
152   USE(maybe_create_input);
153 
154   Handle<Object> is_word_like;
155   // 10. Let granularity be segmenter.[[SegmenterGranularity]].
156   // 11. If granularity is "word", then
157   if (granularity == JSSegmenter::Granularity::WORD) {
158     // a. Let isWordLike be a Boolean value indicating whether the word segment
159     //    segment in string is "word-like" according to locale
160     //    segmenter.[[Locale]].
161     is_word_like = CurrentSegmentIsWordLike(break_iterator)
162                        ? factory->true_value()
163                        : factory->false_value();
164     // b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
165     Maybe<bool> maybe_create_is_word_like = JSReceiver::CreateDataProperty(
166         isolate, result, factory->isWordLike_string(), is_word_like,
167         Just(kDontThrow));
168     DCHECK(maybe_create_is_word_like.FromJust());
169     USE(maybe_create_is_word_like);
170   }
171   return result;
172 }
173 
GranularityAsString(Isolate * isolate) const174 Handle<String> JSSegments::GranularityAsString(Isolate* isolate) const {
175   return JSSegmenter::GetGranularityString(isolate, granularity());
176 }
177 
178 }  // namespace internal
179 }  // namespace v8
180