• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Shenzhen Kaihong Digital Industry Development Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/js_segments.h"
17 
18 
19 #include "ecmascript/base/builtins_base.h"
20 #include "ecmascript/intl/locale_helper.h"
21 #include "ecmascript/object_factory-inl.h"
22 
23 namespace panda::ecmascript {
24 
SetIcuBreakIterator(JSThread * thread,const JSHandle<JSSegments> & segments,icu::BreakIterator * icuBreakIterator,const NativePointerCallback & callback)25 void JSSegments::SetIcuBreakIterator(JSThread *thread, const JSHandle<JSSegments> &segments,
26                                      icu::BreakIterator* icuBreakIterator, const NativePointerCallback &callback)
27 {
28     EcmaVM *ecmaVm = thread->GetEcmaVM();
29     ObjectFactory *factory = ecmaVm->GetFactory();
30 
31     ASSERT(icuBreakIterator != nullptr);
32     JSTaggedValue data = segments->GetIcuField();
33     if (data.IsJSNativePointer()) {
34         JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
35         native->ResetExternalPointer(thread, icuBreakIterator);
36         return;
37     }
38     JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuBreakIterator, callback);
39     segments->SetIcuField(thread, pointer.GetTaggedValue());
40 }
41 
SetUString(JSThread * thread,const JSHandle<JSSegments> & segments,icu::UnicodeString * icuUnicodeString,const NativePointerCallback & callback)42 void JSSegments::SetUString(JSThread *thread, const JSHandle<JSSegments> &segments,
43                             icu::UnicodeString* icuUnicodeString, const NativePointerCallback &callback)
44 {
45     EcmaVM *ecmaVm = thread->GetEcmaVM();
46     ObjectFactory *factory = ecmaVm->GetFactory();
47 
48     ASSERT(icuUnicodeString != nullptr);
49     JSTaggedValue data = segments->GetUnicodeString();
50     if (data.IsJSNativePointer()) {
51         JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
52         native->ResetExternalPointer(thread, icuUnicodeString);
53         return;
54     }
55     JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuUnicodeString, callback);
56     segments->SetUnicodeString(thread, pointer.GetTaggedValue());
57 }
58 
SetTextToBreakIterator(JSThread * thread,const JSHandle<JSSegments> & segments,JSHandle<EcmaString> text,icu::BreakIterator * breakIterator)59 void SetTextToBreakIterator(JSThread *thread, const JSHandle<JSSegments> &segments,
60                             JSHandle<EcmaString> text, icu::BreakIterator* breakIterator)
61 {
62     std::u16string u16str = EcmaStringAccessor(text).ToU16String();
63     icu::UnicodeString src(u16str.data(), u16str.size());
64     icu::UnicodeString* uText = static_cast<icu::UnicodeString*>(src.clone());
65     breakIterator->setText(*uText);
66     JSSegments::SetUString(thread, segments, uText, JSSegments::FreeUString);
67 }
68 
CreateSegmentsObject(JSThread * thread,const JSHandle<JSSegmenter> & segmenter,const JSHandle<EcmaString> & string)69 JSHandle<JSSegments> JSSegments::CreateSegmentsObject(JSThread *thread,
70                                                       const JSHandle<JSSegmenter> &segmenter,
71                                                       const JSHandle<EcmaString> &string)
72 {
73     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
74     // 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
75     // 2. Let segments be OrdinaryObjectCreate(%SegmentsPrototype%, internalSlotsList).
76     JSHandle<GlobalEnv> env = thread->GetEcmaVM()->GetGlobalEnv();
77     JSHandle<JSFunction> segmentsCtor(env->GetSegmentsFunction());
78     JSHandle<JSSegments> segments(factory->NewJSObjectByConstructor(segmentsCtor));
79     // 3. Set segments.[[SegmentsSegmenter]] to segmenter.
80     icu::BreakIterator* icuBreakIterator = segmenter->GetIcuBreakIterator()->clone();
81     SetIcuBreakIterator(thread, segments, icuBreakIterator, JSSegments::FreeIcuBreakIterator);
82     segments->SetGranularity(segmenter->GetGranularity());
83     // 4. Set segments.[[SegmentsString]] to string.
84     segments->SetSegmentsString(thread, string);
85     SetTextToBreakIterator(thread, segments, string, icuBreakIterator);
86     return segments;
87 }
88 
Containing(JSThread * thread,const JSHandle<JSSegments> & segments,double index)89 JSTaggedValue JSSegments::Containing(JSThread *thread, const JSHandle<JSSegments> &segments, double index)
90 {
91     icu::UnicodeString* unicodeString = segments->GetUString();
92     // 5. Let len be the length of string.
93     int32_t len = unicodeString->length();
94     // 7. If n < 0 or n ≥ len, return undefined.
95     if (index < 0 || index >= len) {
96         return JSTaggedValue::Undefined();
97     }
98     int32_t n = static_cast<int32_t>(index);
99     // n may point to the surrogate tail- adjust it back to the lead.
100     n = unicodeString->getChar32Start(n);
101     icu::BreakIterator* breakIterator = segments->GetIcuBreakIterator();
102     // 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
103     int32_t startIndex = breakIterator->isBoundary(n) ? n : breakIterator->preceding(n);
104     // 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
105     int32_t endIndex = breakIterator->following(n);
106     // 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex).
107     return CreateSegmentDataObject(thread, segments->GetGranularity(), breakIterator,
108                                    JSHandle<EcmaString>(thread, segments->GetSegmentsString()),
109                                    *unicodeString, startIndex, endIndex).GetTaggedValue();
110 }
111 
CurrentSegmentIsWordLike(icu::BreakIterator * breakIterator)112 bool CurrentSegmentIsWordLike(icu::BreakIterator* breakIterator)
113 {
114     int32_t rule_status = breakIterator->getRuleStatus();
115     return (rule_status >= UBRK_WORD_NUMBER &&
116             rule_status < UBRK_WORD_NUMBER_LIMIT) ||
117            (rule_status >= UBRK_WORD_LETTER &&
118             rule_status < UBRK_WORD_LETTER_LIMIT) ||
119            (rule_status >= UBRK_WORD_KANA &&
120             rule_status < UBRK_WORD_KANA_LIMIT) ||
121            (rule_status >= UBRK_WORD_IDEO && rule_status < UBRK_WORD_IDEO_LIMIT);
122 }
123 
124 // 18.7.1 CreateSegmentDataObject ( segmenter, string, startIndex, endIndex )
CreateSegmentDataObject(JSThread * thread,GranularityOption granularity,icu::BreakIterator * breakIterator,const JSHandle<EcmaString> & inputString,const icu::UnicodeString & unicodeString,int32_t startIndex,int32_t endIndex)125 JSHandle<JSObject> JSSegments::CreateSegmentDataObject(JSThread *thread, GranularityOption granularity,
126     icu::BreakIterator* breakIterator, const JSHandle<EcmaString> &inputString,
127     const icu::UnicodeString& unicodeString, int32_t startIndex, int32_t endIndex)
128 {
129     // 1. Let len be the length of string.
130     // 2. Assert: startIndex ≥ 0.
131     ASSERT(startIndex >= 0);
132     // 3. Assert: endIndex ≤ len.
133     ASSERT(endIndex <= unicodeString.length());
134     // 4. Assert: startIndex < endIndex.
135     ASSERT(startIndex < endIndex);
136     // 5. Let result be OrdinaryObjectCreate(%Object.prototype%).
137     auto ecmaVm = thread->GetEcmaVM();
138     JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
139     ObjectFactory *factory = ecmaVm->GetFactory();
140     JSHandle<JSFunction> ctor(env->GetObjectFunction());
141     JSHandle<JSObject> result(factory->NewJSObjectByConstructor(ctor));
142     // 6. Let segment be the substring of string from startIndex to endIndex.
143     JSHandle<EcmaString> segment =
144         intl::LocaleHelper::UStringToString(thread, unicodeString, startIndex, endIndex);
145     auto globalConst = thread->GlobalConstants();
146     // 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
147     JSHandle<JSTaggedValue> segmentKey = globalConst->GetHandledSegmentString();
148     JSObject::CreateDataPropertyOrThrow(thread, result, segmentKey, JSHandle<JSTaggedValue>::Cast(segment));
149     // 8. Perform ! CreateDataPropertyOrThrow(result, "index", ��(startIndex)).
150     JSHandle<JSTaggedValue> indexKey = globalConst->GetHandledIndexString();
151     JSObject::CreateDataPropertyOrThrow(thread, result, indexKey, JSHandle<JSTaggedValue>(thread,
152                                         base::BuiltinsBase::GetTaggedInt(startIndex)));
153     // 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
154     JSHandle<JSTaggedValue> inputKey = globalConst->GetHandledInputString();
155     JSObject::CreateDataPropertyOrThrow(thread, result, inputKey, JSHandle<JSTaggedValue>::Cast(inputString));
156     // 10. Let granularity be segmenter.[[SegmenterGranularity]].
157     // 11. If granularity is "word", then
158     //     a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like"
159     //        according to locale segmenter.[[Locale]].
160     //     b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
161     if (granularity == GranularityOption::WORD) {
162         bool isWordLike = CurrentSegmentIsWordLike(breakIterator);
163         JSHandle<JSTaggedValue> isWordLikeKey = globalConst->GetHandledIsWordLikeString();
164         JSObject::CreateDataPropertyOrThrow(thread, result, isWordLikeKey, JSHandle<JSTaggedValue>(thread,
165                                             base::BuiltinsBase::GetTaggedBoolean(isWordLike)));
166     }
167     // 12. Return result.
168     return result;
169 }
170 }  // namespace panda::ecmascript
171