1 /**
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "IntlLanguageTag.h"
17 #include "libpandabase/macros.h"
18
19 #include <algorithm>
20 #include <locale>
21 #include <sstream>
22 #include <string_view>
23
24 namespace ark::ets::stdlib::intl {
25
26 // CC-OFFNXT(G.NAM.03-CPP) project code style
IsAlpha(std::string_view str)27 static bool IsAlpha(std::string_view str)
28 {
29 return std::all_of(str.begin(), str.end(), [](auto ch) { return std::isalpha(ch, std::locale::classic()); });
30 }
31
IsNum(std::string_view str)32 static bool IsNum(std::string_view str)
33 {
34 return std::all_of(str.begin(), str.end(), [](auto ch) { return std::isdigit(ch, std::locale::classic()); });
35 }
36
IsAlphaNum(std::string_view str)37 static bool IsAlphaNum(std::string_view str)
38 {
39 return std::all_of(str.begin(), str.end(), [](auto ch) {
40 return std::isalpha(ch, std::locale::classic()) || std::isdigit(ch, std::locale::classic());
41 });
42 }
43
IsVariant(std::string_view subtag)44 static bool IsVariant(std::string_view subtag)
45 {
46 auto size = subtag.size();
47 if (size == INTL_INDEX_FOUR) {
48 return IsNum(subtag.substr(0, 1)) && IsAlphaNum(subtag.substr(1));
49 }
50 if (INTL_INDEX_FOUR + 1 <= size && size <= INTL_INDEX_EIGHT) {
51 return IsAlphaNum(subtag);
52 }
53 return false;
54 }
55
IsRegion(std::string_view subtag)56 static bool IsRegion(std::string_view subtag)
57 {
58 if (subtag.size() == INTL_INDEX_TWO) {
59 return IsAlpha(subtag);
60 }
61 if (subtag.size() == INTL_INDEX_THREE) {
62 return IsNum(subtag);
63 }
64 return false;
65 }
66
IsScript(std::string_view subtag)67 static bool IsScript(std::string_view subtag)
68 {
69 if (subtag.size() == INTL_INDEX_FOUR) {
70 return IsAlpha(subtag);
71 }
72 return false;
73 }
74
IsExtension(std::string_view subtag)75 static bool IsExtension(std::string_view subtag)
76 {
77 if (subtag.size() == 1) {
78 return IsAlphaNum(subtag);
79 }
80 return false;
81 }
82
IsLanguage(std::string_view subtag)83 static bool IsLanguage(std::string_view subtag)
84 {
85 auto size = subtag.size();
86 if ((INTL_INDEX_TWO <= size && size <= INTL_INDEX_THREE) || (INTL_INDEX_FIVE <= size && size <= INTL_INDEX_EIGHT)) {
87 return IsAlpha(subtag);
88 }
89 return false;
90 }
91
IsPrivateSubTag(const std::string & result,size_t & len)92 bool IsPrivateSubTag(const std::string &result, size_t &len)
93 {
94 if ((len > 1) && (result[1] == '-')) {
95 ASSERT(result[0] == 'x' || result[0] == 'i');
96 return true;
97 }
98 return false;
99 }
100
101 class SplitView {
102 public:
103 class Iterator {
104 public:
Iterator(std::string_view input,char delimiter,size_t pos)105 Iterator(std::string_view input, char delimiter, size_t pos) : input_(input), delimiter_(delimiter), pos_(pos)
106 {
107 next_ = input_.find(delimiter_, pos_);
108 if (next_ == std::string_view::npos) {
109 current_ = input_.substr(pos_);
110 } else {
111 current_ = input_.substr(pos_, next_ - pos_);
112 }
113 }
114
operator *() const115 std::string_view operator*() const
116 {
117 return current_;
118 }
119
operator ++()120 Iterator &operator++()
121 {
122 pos_ = next_ == std::string_view::npos ? input_.size() : next_ + 1;
123 if (pos_ < input_.size()) {
124 next_ = input_.find(delimiter_, pos_);
125 current_ = input_.substr(pos_, next_ == std::string_view::npos ? std::string_view::npos : next_ - pos_);
126 } else {
127 current_ = std::string_view();
128 next_ = std::string_view::npos;
129 }
130 return *this;
131 }
132
operator ==(const Iterator & other) const133 bool operator==(const Iterator &other) const
134 {
135 return pos_ == other.pos_ && input_ == other.input_;
136 }
137
operator !=(const Iterator & other) const138 bool operator!=(const Iterator &other) const
139 {
140 return !(*this == other);
141 }
142
143 private:
144 std::string_view input_;
145 char delimiter_;
146 size_t pos_;
147 size_t next_;
148 std::string_view current_;
149 };
150
SplitView(std::string_view input,char delimiter)151 SplitView(std::string_view input, char delimiter) : input_(input), delimiter_(delimiter) {}
152
Begin() const153 Iterator Begin() const
154 {
155 return Iterator(input_, delimiter_, 0);
156 }
157
End() const158 Iterator End() const
159 {
160 return Iterator(input_, delimiter_, input_.size());
161 }
162
163 private:
164 std::string_view input_;
165 char delimiter_;
166 };
167
IsStructurallyValidLanguageTag(const std::string & tag)168 bool IsStructurallyValidLanguageTag(const std::string &tag)
169 {
170 auto subtags = SplitView(tag, '-');
171 auto it = subtags.Begin();
172 auto end = subtags.End();
173 if (it == end) {
174 return true;
175 }
176 if (!IsLanguage(*it)) {
177 return false;
178 }
179 ++it;
180 if (it == end) {
181 return true;
182 }
183 if (IsExtension(*it)) {
184 return true;
185 }
186 if (IsScript(*it)) {
187 ++it;
188 if (it == end) {
189 return true;
190 }
191 }
192 if (IsRegion(*it)) {
193 ++it;
194 }
195 for (; it != end; ++it) {
196 if (IsExtension(*it)) {
197 return true;
198 }
199 if (!IsVariant(*it)) {
200 return false;
201 }
202 }
203 return true;
204 }
205
ToStdStringLanguageTag(const icu::Locale & locale)206 std::string ToStdStringLanguageTag(const icu::Locale &locale)
207 {
208 UErrorCode status = U_ZERO_ERROR;
209 auto result = locale.toLanguageTag<std::string>(status);
210 if (U_FAILURE(status) != 0) {
211 return "";
212 }
213 size_t findBeginning = result.find("-u-");
214 std::string finalRes;
215 std::string tempRes;
216 if (findBeginning == std::string::npos) {
217 return result;
218 }
219 size_t specialBeginning = findBeginning + INTL_INDEX_THREE;
220 size_t specialCount = 0;
221 while ((specialBeginning < result.size()) && (result[specialBeginning] != '-')) {
222 specialCount++;
223 specialBeginning++;
224 }
225 if (findBeginning != std::string::npos) {
226 // It begin with "-u-xx" or with more elements.
227 tempRes = result.substr(0, findBeginning + INTL_INDEX_THREE + specialCount);
228 if (result.size() <= findBeginning + INTL_INDEX_THREE + specialCount) {
229 return result;
230 }
231 std::string leftStr = result.substr(findBeginning + INTL_INDEX_THREE + specialCount + 1);
232 std::istringstream temp(leftStr);
233 std::string buffer;
234 std::vector<std::string> resContainer;
235 while (getline(temp, buffer, '-')) {
236 if (buffer != "true" && buffer != "yes") {
237 resContainer.push_back(buffer);
238 }
239 }
240 for (auto &it : resContainer) {
241 std::string tag = "-";
242 tag += it;
243 finalRes += tag;
244 }
245 }
246 if (!finalRes.empty()) {
247 tempRes += finalRes;
248 }
249 result = tempRes;
250 return result;
251 }
252
253 } // namespace ark::ets::stdlib::intl