• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Shenzhen Kaihong Digital Industry Development Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/js_segments.h"
17 
18 
19 #include "ecmascript/base/builtins_base.h"
20 #include "ecmascript/intl/locale_helper.h"
21 #include "ecmascript/object_factory-inl.h"
22 
23 namespace panda::ecmascript {
24 
SetIcuBreakIterator(JSThread * thread,const JSHandle<JSSegments> & segments,icu::BreakIterator * icuBreakIterator,const NativePointerCallback & callback)25 void JSSegments::SetIcuBreakIterator(JSThread *thread, const JSHandle<JSSegments> &segments,
26                                      icu::BreakIterator* icuBreakIterator, const NativePointerCallback &callback)
27 {
28     EcmaVM *ecmaVm = thread->GetEcmaVM();
29     ObjectFactory *factory = ecmaVm->GetFactory();
30 
31     ASSERT(icuBreakIterator != nullptr);
32     JSTaggedValue data = segments->GetIcuField(thread);
33     if (data.IsJSNativePointer()) {
34         JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
35         native->ResetExternalPointer(thread, icuBreakIterator);
36         return;
37     }
38     JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuBreakIterator, callback);
39     segments->SetIcuField(thread, pointer.GetTaggedValue());
40 }
41 
SetUString(JSThread * thread,const JSHandle<JSSegments> & segments,icu::UnicodeString * icuUnicodeString,const NativePointerCallback & callback)42 void JSSegments::SetUString(JSThread *thread, const JSHandle<JSSegments> &segments,
43                             icu::UnicodeString* icuUnicodeString, const NativePointerCallback &callback)
44 {
45     EcmaVM *ecmaVm = thread->GetEcmaVM();
46     ObjectFactory *factory = ecmaVm->GetFactory();
47 
48     ASSERT(icuUnicodeString != nullptr);
49     JSTaggedValue data = segments->GetUnicodeString(thread);
50     if (data.IsJSNativePointer()) {
51         JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
52         native->ResetExternalPointer(thread, icuUnicodeString);
53         return;
54     }
55     JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuUnicodeString, callback);
56     segments->SetUnicodeString(thread, pointer.GetTaggedValue());
57 }
58 
SetTextToBreakIterator(JSThread * thread,const JSHandle<JSSegments> & segments,JSHandle<EcmaString> text,icu::BreakIterator * breakIterator)59 void SetTextToBreakIterator(JSThread *thread, const JSHandle<JSSegments> &segments,
60                             JSHandle<EcmaString> text, icu::BreakIterator* breakIterator)
61 {
62     std::u16string u16str = EcmaStringAccessor(text).ToU16String(thread);
63     icu::UnicodeString src(u16str.data(), u16str.size());
64     icu::UnicodeString* uText = static_cast<icu::UnicodeString*>(src.clone());
65     breakIterator->setText(*uText);
66     JSSegments::SetUString(thread, segments, uText, JSSegments::FreeUString);
67 }
68 
CreateSegmentsObject(JSThread * thread,const JSHandle<JSSegmenter> & segmenter,const JSHandle<EcmaString> & string)69 JSHandle<JSSegments> JSSegments::CreateSegmentsObject(JSThread *thread,
70                                                       const JSHandle<JSSegmenter> &segmenter,
71                                                       const JSHandle<EcmaString> &string)
72 {
73     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
74     // 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
75     // 2. Let segments be OrdinaryObjectCreate(%SegmentsPrototype%, internalSlotsList).
76     JSHandle<GlobalEnv> env = thread->GetEcmaVM()->GetGlobalEnv();
77     JSHandle<JSFunction> segmentsCtor(env->GetSegmentsFunction());
78     JSHandle<JSSegments> segments(factory->NewJSObjectByConstructor(segmentsCtor));
79     // 3. Set segments.[[SegmentsSegmenter]] to segmenter.
80     icu::BreakIterator* icuBreakIterator = segmenter->GetIcuBreakIterator(thread)->clone();
81     SetIcuBreakIterator(thread, segments, icuBreakIterator, JSSegments::FreeIcuBreakIterator);
82     segments->SetGranularity(segmenter->GetGranularity());
83     // 4. Set segments.[[SegmentsString]] to string.
84     segments->SetSegmentsString(thread, string);
85     SetTextToBreakIterator(thread, segments, string, icuBreakIterator);
86     return segments;
87 }
88 
Containing(JSThread * thread,const JSHandle<JSSegments> & segments,double index)89 JSTaggedValue JSSegments::Containing(JSThread *thread, const JSHandle<JSSegments> &segments, double index)
90 {
91     icu::UnicodeString* unicodeString = segments->GetUString(thread);
92     // 5. Let len be the length of string.
93     int32_t len = unicodeString->length();
94     // 7. If n < 0 or n ≥ len, return undefined.
95     if (index < 0 || index >= len) {
96         return JSTaggedValue::Undefined();
97     }
98     int32_t n = static_cast<int32_t>(index);
99     // n may point to the surrogate tail- adjust it back to the lead.
100     n = unicodeString->getChar32Start(n);
101     icu::BreakIterator* breakIterator = segments->GetIcuBreakIterator(thread);
102     // 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
103     int32_t startIndex = breakIterator->isBoundary(n) ? n : breakIterator->preceding(n);
104     // 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
105     int32_t endIndex = breakIterator->following(n);
106     // 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex).
107     return CreateSegmentDataObject(thread, segments->GetGranularity(), breakIterator,
108                                    JSHandle<EcmaString>(thread, segments->GetSegmentsString(thread)),
109                                    *unicodeString, startIndex, endIndex).GetTaggedValue();
110 }
111 
CurrentSegmentIsWordLike(icu::BreakIterator * textBreakIterator)112 bool CurrentSegmentIsWordLike(icu::BreakIterator* textBreakIterator)
113 {
114     int32_t currentSegmentRuleStatus = textBreakIterator->getRuleStatus();
115     return (currentSegmentRuleStatus >= UBRK_WORD_NUMBER &&
116             currentSegmentRuleStatus < UBRK_WORD_NUMBER_LIMIT) ||
117            (currentSegmentRuleStatus >= UBRK_WORD_LETTER &&
118             currentSegmentRuleStatus < UBRK_WORD_LETTER_LIMIT) ||
119            (currentSegmentRuleStatus >= UBRK_WORD_KANA &&
120             currentSegmentRuleStatus < UBRK_WORD_KANA_LIMIT) ||
121            (currentSegmentRuleStatus >= UBRK_WORD_IDEO &&
122             currentSegmentRuleStatus < UBRK_WORD_IDEO_LIMIT);
123 }
124 
125 // 18.7.1 CreateSegmentDataObject ( segmenter, string, startIndex, endIndex )
CreateSegmentDataObject(JSThread * thread,GranularityOption granularity,icu::BreakIterator * breakIterator,const JSHandle<EcmaString> & inputString,const icu::UnicodeString & unicodeString,int32_t startIndex,int32_t endIndex)126 JSHandle<JSObject> JSSegments::CreateSegmentDataObject(JSThread *thread, GranularityOption granularity,
127     icu::BreakIterator* breakIterator, const JSHandle<EcmaString> &inputString,
128     const icu::UnicodeString& unicodeString, int32_t startIndex, int32_t endIndex)
129 {
130     // 1. Let len be the length of string.
131     // 2. Assert: startIndex ≥ 0.
132     ASSERT(startIndex >= 0);
133     // 3. Assert: endIndex ≤ len.
134     ASSERT(endIndex <= unicodeString.length());
135     // 4. Assert: startIndex < endIndex.
136     ASSERT(startIndex < endIndex);
137     // 5. Let result be OrdinaryObjectCreate(%Object.prototype%).
138     auto ecmaVm = thread->GetEcmaVM();
139     JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
140     ObjectFactory *factory = ecmaVm->GetFactory();
141     JSHandle<JSFunction> ctor(env->GetObjectFunction());
142     JSHandle<JSObject> result(factory->NewJSObjectByConstructor(ctor));
143     // 6. Let segment be the substring of string from startIndex to endIndex.
144     JSHandle<EcmaString> segment =
145         intl::LocaleHelper::UStringToString(thread, unicodeString, startIndex, endIndex);
146     auto globalConst = thread->GlobalConstants();
147     // 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
148     JSHandle<JSTaggedValue> segmentKey = globalConst->GetHandledSegmentString();
149     JSObject::CreateDataPropertyOrThrow(thread, result, segmentKey, JSHandle<JSTaggedValue>::Cast(segment));
150     // 8. Perform ! CreateDataPropertyOrThrow(result, "index", ��(startIndex)).
151     JSHandle<JSTaggedValue> indexKey = globalConst->GetHandledIndexString();
152     JSObject::CreateDataPropertyOrThrow(thread, result, indexKey, JSHandle<JSTaggedValue>(thread,
153                                         base::BuiltinsBase::GetTaggedInt(startIndex)));
154     // 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
155     JSHandle<JSTaggedValue> inputKey = globalConst->GetHandledInputString();
156     JSObject::CreateDataPropertyOrThrow(thread, result, inputKey, JSHandle<JSTaggedValue>::Cast(inputString));
157     // 10. Let granularity be segmenter.[[SegmenterGranularity]].
158     // 11. If granularity is "word", then
159     //     a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like"
160     //        according to locale segmenter.[[Locale]].
161     //     b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
162     if (granularity == GranularityOption::WORD) {
163         bool isWordLike = CurrentSegmentIsWordLike(breakIterator);
164         JSHandle<JSTaggedValue> isWordLikeKey = globalConst->GetHandledIsWordLikeString();
165         JSObject::CreateDataPropertyOrThrow(thread, result, isWordLikeKey, JSHandle<JSTaggedValue>(thread,
166                                             base::BuiltinsBase::GetTaggedBoolean(isWordLike)));
167     }
168     // 12. Return result.
169     return result;
170 }
171 }  // namespace panda::ecmascript
172