1 /*
2 * Copyright (c) 2023 Shenzhen Kaihong Digital Industry Development Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/js_segments.h"
17
18
19 #include "ecmascript/base/builtins_base.h"
20 #include "ecmascript/intl/locale_helper.h"
21 #include "ecmascript/object_factory-inl.h"
22
23 namespace panda::ecmascript {
24
SetIcuBreakIterator(JSThread * thread,const JSHandle<JSSegments> & segments,icu::BreakIterator * icuBreakIterator,const NativePointerCallback & callback)25 void JSSegments::SetIcuBreakIterator(JSThread *thread, const JSHandle<JSSegments> &segments,
26 icu::BreakIterator* icuBreakIterator, const NativePointerCallback &callback)
27 {
28 EcmaVM *ecmaVm = thread->GetEcmaVM();
29 ObjectFactory *factory = ecmaVm->GetFactory();
30
31 ASSERT(icuBreakIterator != nullptr);
32 JSTaggedValue data = segments->GetIcuField();
33 if (data.IsJSNativePointer()) {
34 JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
35 native->ResetExternalPointer(thread, icuBreakIterator);
36 return;
37 }
38 JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuBreakIterator, callback);
39 segments->SetIcuField(thread, pointer.GetTaggedValue());
40 }
41
SetUString(JSThread * thread,const JSHandle<JSSegments> & segments,icu::UnicodeString * icuUnicodeString,const NativePointerCallback & callback)42 void JSSegments::SetUString(JSThread *thread, const JSHandle<JSSegments> &segments,
43 icu::UnicodeString* icuUnicodeString, const NativePointerCallback &callback)
44 {
45 EcmaVM *ecmaVm = thread->GetEcmaVM();
46 ObjectFactory *factory = ecmaVm->GetFactory();
47
48 ASSERT(icuUnicodeString != nullptr);
49 JSTaggedValue data = segments->GetUnicodeString();
50 if (data.IsJSNativePointer()) {
51 JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
52 native->ResetExternalPointer(thread, icuUnicodeString);
53 return;
54 }
55 JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuUnicodeString, callback);
56 segments->SetUnicodeString(thread, pointer.GetTaggedValue());
57 }
58
SetTextToBreakIterator(JSThread * thread,const JSHandle<JSSegments> & segments,JSHandle<EcmaString> text,icu::BreakIterator * breakIterator)59 void SetTextToBreakIterator(JSThread *thread, const JSHandle<JSSegments> &segments,
60 JSHandle<EcmaString> text, icu::BreakIterator* breakIterator)
61 {
62 std::u16string u16str = EcmaStringAccessor(text).ToU16String();
63 icu::UnicodeString src(u16str.data(), u16str.size());
64 icu::UnicodeString* uText = static_cast<icu::UnicodeString*>(src.clone());
65 breakIterator->setText(*uText);
66 JSSegments::SetUString(thread, segments, uText, JSSegments::FreeUString);
67 }
68
CreateSegmentsObject(JSThread * thread,const JSHandle<JSSegmenter> & segmenter,const JSHandle<EcmaString> & string)69 JSHandle<JSSegments> JSSegments::CreateSegmentsObject(JSThread *thread,
70 const JSHandle<JSSegmenter> &segmenter,
71 const JSHandle<EcmaString> &string)
72 {
73 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
74 // 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
75 // 2. Let segments be OrdinaryObjectCreate(%SegmentsPrototype%, internalSlotsList).
76 JSHandle<GlobalEnv> env = thread->GetEcmaVM()->GetGlobalEnv();
77 JSHandle<JSFunction> segmentsCtor(env->GetSegmentsFunction());
78 JSHandle<JSSegments> segments(factory->NewJSObjectByConstructor(segmentsCtor));
79 // 3. Set segments.[[SegmentsSegmenter]] to segmenter.
80 icu::BreakIterator* icuBreakIterator = segmenter->GetIcuBreakIterator()->clone();
81 SetIcuBreakIterator(thread, segments, icuBreakIterator, JSSegments::FreeIcuBreakIterator);
82 segments->SetGranularity(segmenter->GetGranularity());
83 // 4. Set segments.[[SegmentsString]] to string.
84 segments->SetSegmentsString(thread, string);
85 SetTextToBreakIterator(thread, segments, string, icuBreakIterator);
86 return segments;
87 }
88
Containing(JSThread * thread,const JSHandle<JSSegments> & segments,double index)89 JSTaggedValue JSSegments::Containing(JSThread *thread, const JSHandle<JSSegments> &segments, double index)
90 {
91 icu::UnicodeString* unicodeString = segments->GetUString();
92 // 5. Let len be the length of string.
93 int32_t len = unicodeString->length();
94 // 7. If n < 0 or n ≥ len, return undefined.
95 if (index < 0 || index >= len) {
96 return JSTaggedValue::Undefined();
97 }
98 int32_t n = static_cast<int32_t>(index);
99 // n may point to the surrogate tail- adjust it back to the lead.
100 n = unicodeString->getChar32Start(n);
101 icu::BreakIterator* breakIterator = segments->GetIcuBreakIterator();
102 // 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
103 int32_t startIndex = breakIterator->isBoundary(n) ? n : breakIterator->preceding(n);
104 // 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
105 int32_t endIndex = breakIterator->following(n);
106 // 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex).
107 return CreateSegmentDataObject(thread, segments->GetGranularity(), breakIterator,
108 JSHandle<EcmaString>(thread, segments->GetSegmentsString()),
109 *unicodeString, startIndex, endIndex).GetTaggedValue();
110 }
111
CurrentSegmentIsWordLike(icu::BreakIterator * breakIterator)112 bool CurrentSegmentIsWordLike(icu::BreakIterator* breakIterator)
113 {
114 int32_t rule_status = breakIterator->getRuleStatus();
115 return (rule_status >= UBRK_WORD_NUMBER &&
116 rule_status < UBRK_WORD_NUMBER_LIMIT) ||
117 (rule_status >= UBRK_WORD_LETTER &&
118 rule_status < UBRK_WORD_LETTER_LIMIT) ||
119 (rule_status >= UBRK_WORD_KANA &&
120 rule_status < UBRK_WORD_KANA_LIMIT) ||
121 (rule_status >= UBRK_WORD_IDEO && rule_status < UBRK_WORD_IDEO_LIMIT);
122 }
123
124 // 18.7.1 CreateSegmentDataObject ( segmenter, string, startIndex, endIndex )
CreateSegmentDataObject(JSThread * thread,GranularityOption granularity,icu::BreakIterator * breakIterator,const JSHandle<EcmaString> & inputString,const icu::UnicodeString & unicodeString,int32_t startIndex,int32_t endIndex)125 JSHandle<JSObject> JSSegments::CreateSegmentDataObject(JSThread *thread, GranularityOption granularity,
126 icu::BreakIterator* breakIterator, const JSHandle<EcmaString> &inputString,
127 const icu::UnicodeString& unicodeString, int32_t startIndex, int32_t endIndex)
128 {
129 // 1. Let len be the length of string.
130 // 2. Assert: startIndex ≥ 0.
131 ASSERT(startIndex >= 0);
132 // 3. Assert: endIndex ≤ len.
133 ASSERT(endIndex <= unicodeString.length());
134 // 4. Assert: startIndex < endIndex.
135 ASSERT(startIndex < endIndex);
136 // 5. Let result be OrdinaryObjectCreate(%Object.prototype%).
137 auto ecmaVm = thread->GetEcmaVM();
138 JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
139 ObjectFactory *factory = ecmaVm->GetFactory();
140 JSHandle<JSFunction> ctor(env->GetObjectFunction());
141 JSHandle<JSObject> result(factory->NewJSObjectByConstructor(ctor));
142 // 6. Let segment be the substring of string from startIndex to endIndex.
143 JSHandle<EcmaString> segment =
144 intl::LocaleHelper::UStringToString(thread, unicodeString, startIndex, endIndex);
145 auto globalConst = thread->GlobalConstants();
146 // 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
147 JSHandle<JSTaggedValue> segmentKey = globalConst->GetHandledSegmentString();
148 JSObject::CreateDataPropertyOrThrow(thread, result, segmentKey, JSHandle<JSTaggedValue>::Cast(segment));
149 // 8. Perform ! CreateDataPropertyOrThrow(result, "index", (startIndex)).
150 JSHandle<JSTaggedValue> indexKey = globalConst->GetHandledIndexString();
151 JSObject::CreateDataPropertyOrThrow(thread, result, indexKey, JSHandle<JSTaggedValue>(thread,
152 base::BuiltinsBase::GetTaggedInt(startIndex)));
153 // 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
154 JSHandle<JSTaggedValue> inputKey = globalConst->GetHandledInputString();
155 JSObject::CreateDataPropertyOrThrow(thread, result, inputKey, JSHandle<JSTaggedValue>::Cast(inputString));
156 // 10. Let granularity be segmenter.[[SegmenterGranularity]].
157 // 11. If granularity is "word", then
158 // a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like"
159 // according to locale segmenter.[[Locale]].
160 // b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
161 if (granularity == GranularityOption::WORD) {
162 bool isWordLike = CurrentSegmentIsWordLike(breakIterator);
163 JSHandle<JSTaggedValue> isWordLikeKey = globalConst->GetHandledIsWordLikeString();
164 JSObject::CreateDataPropertyOrThrow(thread, result, isWordLikeKey, JSHandle<JSTaggedValue>(thread,
165 base::BuiltinsBase::GetTaggedBoolean(isWordLike)));
166 }
167 // 12. Return result.
168 return result;
169 }
170 } // namespace panda::ecmascript
171