1 // Copyright 2013 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_INTL_SUPPORT 6 #error Internationalization is expected to be enabled. 7 #endif // V8_INTL_SUPPORT 8 9 #ifndef V8_OBJECTS_INTL_OBJECTS_H_ 10 #define V8_OBJECTS_INTL_OBJECTS_H_ 11 12 #include <map> 13 #include <memory> 14 #include <set> 15 #include <string> 16 17 #include "src/base/timezone-cache.h" 18 #include "src/objects/contexts.h" 19 #include "src/objects/managed.h" 20 #include "src/objects/objects.h" 21 #include "unicode/locid.h" 22 #include "unicode/uversion.h" 23 24 #define V8_MINIMUM_ICU_VERSION 69 25 26 namespace U_ICU_NAMESPACE { 27 class BreakIterator; 28 class Collator; 29 class FormattedValue; 30 class StringEnumeration; 31 class TimeZone; 32 class UnicodeString; 33 } // namespace U_ICU_NAMESPACE 34 35 namespace v8 { 36 namespace internal { 37 38 struct NumberFormatSpan { 39 int32_t field_id; 40 int32_t begin_pos; 41 int32_t end_pos; 42 43 NumberFormatSpan() = default; NumberFormatSpanNumberFormatSpan44 NumberFormatSpan(int32_t field_id, int32_t begin_pos, int32_t end_pos) 45 : field_id(field_id), begin_pos(begin_pos), end_pos(end_pos) {} 46 }; 47 48 V8_EXPORT_PRIVATE std::vector<NumberFormatSpan> FlattenRegionsToParts( 49 std::vector<NumberFormatSpan>* regions); 50 51 template <typename T> 52 class Handle; 53 class JSCollator; 54 55 class Intl { 56 public: 57 enum class BoundFunctionContextSlot { 58 kBoundFunction = Context::MIN_CONTEXT_SLOTS, 59 kLength 60 }; 61 62 enum class FormatRangeSource { kShared, kStartRange, kEndRange }; 63 64 class FormatRangeSourceTracker { 65 public: 66 FormatRangeSourceTracker(); 67 void Add(int32_t field, int32_t start, int32_t limit); 68 FormatRangeSource GetSource(int32_t start, int32_t limit) const; 69 70 private: 71 int32_t start_[2]; 72 int32_t limit_[2]; 73 74 bool FieldContains(int32_t field, int32_t start, int32_t limit) const; 75 }; 76 77 static Handle<String> SourceString(Isolate* isolate, 78 FormatRangeSource source); 79 80 // Build a set of ICU locales from a list of Locales. If there is a locale 81 // with a script tag then the locales also include a locale without the 82 // script; eg, pa_Guru_IN (language=Panjabi, script=Gurmukhi, country-India) 83 // would include pa_IN. 84 static std::set<std::string> BuildLocaleSet( 85 const std::vector<std::string>& locales, const char* path, 86 const char* validate_key); 87 88 static Maybe<std::string> ToLanguageTag(const icu::Locale& locale); 89 90 // Get the name of the numbering system from locale. 91 // ICU doesn't expose numbering system in any way, so we have to assume that 92 // for given locale NumberingSystem constructor produces the same digits as 93 // NumberFormat/Calendar would. 94 static std::string GetNumberingSystem(const icu::Locale& icu_locale); 95 96 static V8_WARN_UNUSED_RESULT MaybeHandle<JSObject> SupportedLocalesOf( 97 Isolate* isolate, const char* method_name, 98 const std::set<std::string>& available_locales, Handle<Object> locales_in, 99 Handle<Object> options_in); 100 101 // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist 102 // {only_return_one_result} is an optimization for callers that only 103 // care about the first result. 104 static Maybe<std::vector<std::string>> CanonicalizeLocaleList( 105 Isolate* isolate, Handle<Object> locales, 106 bool only_return_one_result = false); 107 108 // ecma-402 #sec-intl.getcanonicallocales 109 V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> GetCanonicalLocales( 110 Isolate* isolate, Handle<Object> locales); 111 112 // ecma-402 #sec-intl.supportedvaluesof 113 V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> SupportedValuesOf( 114 Isolate* isolate, Handle<Object> key); 115 116 // For locale sensitive functions 117 V8_WARN_UNUSED_RESULT static MaybeHandle<String> StringLocaleConvertCase( 118 Isolate* isolate, Handle<String> s, bool is_upper, 119 Handle<Object> locales); 120 121 V8_WARN_UNUSED_RESULT static MaybeHandle<String> ConvertToUpper( 122 Isolate* isolate, Handle<String> s); 123 124 V8_WARN_UNUSED_RESULT static MaybeHandle<String> ConvertToLower( 125 Isolate* isolate, Handle<String> s); 126 127 V8_WARN_UNUSED_RESULT static base::Optional<int> StringLocaleCompare( 128 Isolate* isolate, Handle<String> s1, Handle<String> s2, 129 Handle<Object> locales, Handle<Object> options, const char* method_name); 130 131 enum class CompareStringsOptions { 132 kNone, 133 kTryFastPath, 134 }; 135 template <class IsolateT> 136 V8_EXPORT_PRIVATE static CompareStringsOptions CompareStringsOptionsFor( 137 IsolateT* isolate, Handle<Object> locales, Handle<Object> options); 138 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static int CompareStrings( 139 Isolate* isolate, const icu::Collator& collator, Handle<String> s1, 140 Handle<String> s2, 141 CompareStringsOptions compare_strings_options = 142 CompareStringsOptions::kNone); 143 144 // ecma402/#sup-properties-of-the-number-prototype-object 145 V8_WARN_UNUSED_RESULT static MaybeHandle<String> NumberToLocaleString( 146 Isolate* isolate, Handle<Object> num, Handle<Object> locales, 147 Handle<Object> options, const char* method_name); 148 149 // [[RoundingPriority]] is one of the String values "auto", "morePrecision", 150 // or "lessPrecision", specifying the rounding priority for the number. 151 enum class RoundingPriority { 152 kAuto, 153 kMorePrecision, 154 kLessPrecision, 155 }; 156 157 enum class RoundingType { 158 kFractionDigits, 159 kSignificantDigits, 160 kMorePrecision, 161 kLessPrecision, 162 }; 163 164 // ecma402/#sec-setnfdigitoptions 165 struct NumberFormatDigitOptions { 166 int minimum_integer_digits; 167 int minimum_fraction_digits; 168 int maximum_fraction_digits; 169 int minimum_significant_digits; 170 int maximum_significant_digits; 171 RoundingPriority rounding_priority; 172 RoundingType rounding_type; 173 }; 174 V8_WARN_UNUSED_RESULT static Maybe<NumberFormatDigitOptions> 175 SetNumberFormatDigitOptions(Isolate* isolate, Handle<JSReceiver> options, 176 int mnfd_default, int mxfd_default, 177 bool notation_is_compact); 178 179 // Helper function to convert a UnicodeString to a Handle<String> 180 V8_WARN_UNUSED_RESULT static MaybeHandle<String> ToString( 181 Isolate* isolate, const icu::UnicodeString& string); 182 183 // Helper function to convert a substring of UnicodeString to a Handle<String> 184 V8_WARN_UNUSED_RESULT static MaybeHandle<String> ToString( 185 Isolate* isolate, const icu::UnicodeString& string, int32_t begin, 186 int32_t end); 187 188 // Helper function to convert a FormattedValue to String 189 V8_WARN_UNUSED_RESULT static MaybeHandle<String> FormattedToString( 190 Isolate* isolate, const icu::FormattedValue& formatted); 191 192 // Helper function to convert number field id to type string. 193 static Handle<String> NumberFieldToType(Isolate* isolate, 194 const NumberFormatSpan& part, 195 const icu::UnicodeString& text, 196 bool is_nan); 197 198 // A helper function to implement formatToParts which add element to array as 199 // $array[$index] = { type: $field_type_string, value: $value } 200 static void AddElement(Isolate* isolate, Handle<JSArray> array, int index, 201 Handle<String> field_type_string, 202 Handle<String> value); 203 204 // A helper function to implement formatToParts which add element to array as 205 // $array[$index] = { 206 // type: $field_type_string, value: $value, 207 // $additional_property_name: $additional_property_value 208 // } 209 static void AddElement(Isolate* isolate, Handle<JSArray> array, int index, 210 Handle<String> field_type_string, Handle<String> value, 211 Handle<String> additional_property_name, 212 Handle<String> additional_property_value); 213 214 // In ECMA 402 v1, Intl constructors supported a mode of operation 215 // where calling them with an existing object as a receiver would 216 // transform the receiver into the relevant Intl instance with all 217 // internal slots. In ECMA 402 v2, this capability was removed, to 218 // avoid adding internal slots on existing objects. In ECMA 402 v3, 219 // the capability was re-added as "normative optional" in a mode 220 // which chains the underlying Intl instance on any object, when the 221 // constructor is called 222 // 223 // See ecma402/#legacy-constructor. 224 V8_WARN_UNUSED_RESULT static MaybeHandle<Object> LegacyUnwrapReceiver( 225 Isolate* isolate, Handle<JSReceiver> receiver, 226 Handle<JSFunction> constructor, bool has_initialized_slot); 227 228 // enum for "localeMatcher" option: shared by many Intl objects. 229 enum class MatcherOption { kBestFit, kLookup }; 230 231 // Shared function to read the "localeMatcher" option. 232 V8_WARN_UNUSED_RESULT static Maybe<MatcherOption> GetLocaleMatcher( 233 Isolate* isolate, Handle<JSReceiver> options, const char* method_name); 234 235 // Shared function to read the "numberingSystem" option. 236 V8_WARN_UNUSED_RESULT static Maybe<bool> GetNumberingSystem( 237 Isolate* isolate, Handle<JSReceiver> options, const char* method_name, 238 std::unique_ptr<char[]>* result); 239 240 // Check the calendar is valid or not for that locale. 241 static bool IsValidCalendar(const icu::Locale& locale, 242 const std::string& value); 243 244 // Check the collation is valid or not for that locale. 245 static bool IsValidCollation(const icu::Locale& locale, 246 const std::string& value); 247 248 // Check the numberingSystem is valid. 249 static bool IsValidNumberingSystem(const std::string& value); 250 251 // Check the calendar is well formed. 252 static bool IsWellFormedCalendar(const std::string& value); 253 254 // Check the currency is well formed. 255 static bool IsWellFormedCurrency(const std::string& value); 256 257 struct ResolvedLocale { 258 std::string locale; 259 icu::Locale icu_locale; 260 std::map<std::string, std::string> extensions; 261 }; 262 263 static Maybe<ResolvedLocale> ResolveLocale( 264 Isolate* isolate, const std::set<std::string>& available_locales, 265 const std::vector<std::string>& requested_locales, MatcherOption options, 266 const std::set<std::string>& relevant_extension_keys); 267 268 // A helper template to implement the GetAvailableLocales 269 // Usage in src/objects/js-XXX.cc 270 // const std::set<std::string>& JSXxx::GetAvailableLocales() { 271 // static base::LazyInstance<Intl::AvailableLocales<icu::YYY>>::type 272 // available_locales = LAZY_INSTANCE_INITIALIZER; 273 // return available_locales.Pointer()->Get(); 274 // } 275 276 struct SkipResourceCheck { keySkipResourceCheck277 static const char* key() { return nullptr; } pathSkipResourceCheck278 static const char* path() { return nullptr; } 279 }; 280 281 template <typename C = SkipResourceCheck> 282 class AvailableLocales { 283 public: AvailableLocales()284 AvailableLocales() { 285 UErrorCode status = U_ZERO_ERROR; 286 UEnumeration* uenum = 287 uloc_openAvailableByType(ULOC_AVAILABLE_WITH_LEGACY_ALIASES, &status); 288 DCHECK(U_SUCCESS(status)); 289 290 std::vector<std::string> all_locales; 291 const char* loc; 292 while ((loc = uenum_next(uenum, nullptr, &status)) != nullptr) { 293 DCHECK(U_SUCCESS(status)); 294 std::string locstr(loc); 295 std::replace(locstr.begin(), locstr.end(), '_', '-'); 296 // Handle special case 297 if (locstr == "en-US-POSIX") locstr = "en-US-u-va-posix"; 298 all_locales.push_back(locstr); 299 } 300 uenum_close(uenum); 301 302 set_ = Intl::BuildLocaleSet(all_locales, C::path(), C::key()); 303 } Get()304 const std::set<std::string>& Get() const { return set_; } 305 306 private: 307 std::set<std::string> set_; 308 }; 309 310 // Utility function to set text to BreakIterator. 311 static Handle<Managed<icu::UnicodeString>> SetTextToBreakIterator( 312 Isolate* isolate, Handle<String> text, 313 icu::BreakIterator* break_iterator); 314 315 // ecma262 #sec-string.prototype.normalize 316 V8_WARN_UNUSED_RESULT static MaybeHandle<String> Normalize( 317 Isolate* isolate, Handle<String> string, Handle<Object> form_input); 318 static base::TimezoneCache* CreateTimeZoneCache(); 319 320 // Convert a Handle<String> to icu::UnicodeString 321 static icu::UnicodeString ToICUUnicodeString(Isolate* isolate, 322 Handle<String> string, 323 int offset = 0); 324 325 static const uint8_t* ToLatin1LowerTable(); 326 327 static const uint8_t* AsciiCollationWeightsL1(); 328 static const uint8_t* AsciiCollationWeightsL3(); 329 static const int kAsciiCollationWeightsLength; 330 331 static String ConvertOneByteToLower(String src, String dst); 332 333 static const std::set<std::string>& GetAvailableLocales(); 334 335 static const std::set<std::string>& GetAvailableLocalesForDateFormat(); 336 337 V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> ToJSArray( 338 Isolate* isolate, const char* unicode_key, 339 icu::StringEnumeration* enumeration, 340 const std::function<bool(const char*)>& removes, bool sort); 341 342 static bool RemoveCollation(const char* collation); 343 344 static std::set<std::string> SanctionedSimpleUnits(); 345 346 V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> AvailableCalendars( 347 Isolate* isolate); 348 349 V8_WARN_UNUSED_RESULT static bool IsValidTimeZoneName( 350 const icu::TimeZone& tz); 351 V8_WARN_UNUSED_RESULT static bool IsValidTimeZoneName(Isolate* isolate, 352 const std::string& id); 353 V8_WARN_UNUSED_RESULT static bool IsValidTimeZoneName(Isolate* isolate, 354 Handle<String> id); 355 356 // Function to support Temporal 357 V8_WARN_UNUSED_RESULT static std::string TimeZoneIdFromIndex(int32_t index); 358 359 V8_WARN_UNUSED_RESULT static Maybe<bool> GetTimeZoneIndex( 360 Isolate* isolate, Handle<String> identifier, int32_t* index); 361 362 V8_WARN_UNUSED_RESULT static MaybeHandle<String> CanonicalizeTimeZoneName( 363 Isolate* isolate, Handle<String> identifier); 364 365 // ecma402/#sec-coerceoptionstoobject 366 V8_WARN_UNUSED_RESULT static MaybeHandle<JSReceiver> CoerceOptionsToObject( 367 Isolate* isolate, Handle<Object> options, const char* service); 368 369 // #sec-tointlmathematicalvalue 370 // The implementation preserve the Object in String, BigInt or Number 371 V8_WARN_UNUSED_RESULT static MaybeHandle<Object> 372 ToIntlMathematicalValueAsNumberBigIntOrString(Isolate* isolate, 373 Handle<Object> input); 374 }; 375 376 } // namespace internal 377 } // namespace v8 378 379 #endif // V8_OBJECTS_INTL_OBJECTS_H_ 380