1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2016, Google, Inc.; International Business Machines * 6 * Corporation and others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.util; 11 12 import java.util.Collections; 13 import java.util.Comparator; 14 import java.util.Iterator; 15 import java.util.LinkedHashMap; 16 import java.util.LinkedList; 17 import java.util.List; 18 import java.util.Map; 19 import java.util.Map.Entry; 20 import java.util.Set; 21 import java.util.TreeMap; 22 import java.util.regex.Matcher; 23 import java.util.regex.Pattern; 24 25 /** 26 * Provides an immutable list of languages/locales in priority order. 27 * The string format is based on the Accept-Language format 28 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), such as 29 * "af, en, fr;q=0.9". Syntactically it is slightly 30 * more lenient, in allowing extra whitespace between elements, extra commas, 31 * and more than 3 decimals (on input). The qvalues must be between 0 and 1. 32 * 33 * <p>In theory, Accept-Language indicates the relative 'quality' of each item, 34 * but in practice, all of the browsers just take an ordered list, like 35 * "en, fr, de", and synthesize arbitrary quality values that put these in the 36 * right order, like: "en, fr;q=0.7, de;q=0.3". The quality values in these de facto 37 * semantics thus have <b>nothing</b> to do with the relative qualities of the 38 * original. Accept-Language also doesn't 39 * specify the interpretation of multiple instances, eg what "en, fr, en;q=.5" 40 * means. 41 * <p>There are various ways to build a LocalePriorityList, such 42 * as using the following equivalent patterns: 43 * 44 * <pre> 45 * list = LocalePriorityList.add("af, en, fr;q=0.9").build(); 46 * 47 * list2 = LocalePriorityList 48 * .add(ULocale.forString("af")) 49 * .add(ULocale.ENGLISH) 50 * .add(ULocale.FRENCH, 0.9d) 51 * .build(); 52 * </pre> 53 * When the list is built, the internal values are sorted in descending order by weight, 54 * and then by input order. 55 * That is, if two languages/locales have the same weight, the first one in the original order comes first. 56 * If exactly the same language tag appears multiple times, the last one wins. 57 * 58 * <p>There are two options when building. 59 * If preserveWeights are on, then "de;q=0.3, ja;q=0.3, en, fr;q=0.7, de " would result in the following: 60 * <pre> en;q=1.0 61 * de;q=1.0 62 * fr;q=0.7 63 * ja;q=0.3</pre> 64 * If it is off (the default), then all weights are reset to 1.0 after reordering. 65 * This is to match the effect of the Accept-Language semantics as used in browsers, and results in the following: 66 * * <pre> en;q=1.0 67 * de;q=1.0 68 * fr;q=1.0 69 * ja;q=1.0</pre> 70 * @author markdavis@google.com 71 * @stable ICU 4.4 72 */ 73 public class LocalePriorityList implements Iterable<ULocale> { 74 private static final Double D1 = 1.0d; 75 76 private static final Pattern languageSplitter = Pattern.compile("\\s*,\\s*"); 77 private static final Pattern weightSplitter = Pattern 78 .compile("\\s*(\\S*)\\s*;\\s*q\\s*=\\s*(\\S*)"); 79 private final Map<ULocale, Double> languagesAndWeights; 80 81 /** 82 * Creates a Builder and adds locales, each with weight 1.0. 83 * 84 * @param locales locales/languages to be added 85 * @return a new builder with these locales, for chaining 86 * @stable ICU 4.4 87 */ add(ULocale... locales)88 public static Builder add(ULocale... locales) { 89 return new Builder().add(locales); 90 } 91 92 /** 93 * Creates a Builder and adds a locale with a specified weight. 94 * A zero or negative weight leads to removing the locale. 95 * A weight greater than 1 is pinned to 1. 96 * 97 * @param locale locale/language to be added 98 * @param weight value from 0.0 to 1.0 99 * @return a new builder with this locale, for chaining 100 * @stable ICU 4.4 101 */ add(ULocale locale, final double weight)102 public static Builder add(ULocale locale, final double weight) { 103 return new Builder().add(locale, weight); 104 } 105 106 /** 107 * Creates a Builder and adds locales with weights. 108 * 109 * @param list list of locales with weights 110 * @return a new builder with these locales, for chaining 111 * @stable ICU 4.4 112 */ add(LocalePriorityList list)113 public static Builder add(LocalePriorityList list) { 114 return new Builder(list); 115 } 116 117 /** 118 * Creates a Builder, parses the RFC 2616 string, and adds locales with weights accordingly. 119 * 120 * @param acceptLanguageString String in RFC 2616 format (leniently parsed) 121 * @return a new builder with these locales, for chaining 122 * @stable ICU 4.4 123 */ add(String acceptLanguageString)124 public static Builder add(String acceptLanguageString) { 125 return new Builder().add(acceptLanguageString); 126 } 127 128 /** 129 * Returns the weight for a given language/locale, or null if there is none. 130 * Note that the weights may be adjusted from those used to build the list. 131 * 132 * @param locale to get weight of 133 * @return weight 134 * @stable ICU 4.4 135 */ getWeight(ULocale locale)136 public Double getWeight(ULocale locale) { 137 return languagesAndWeights.get(locale); 138 } 139 140 /** 141 * Returns the locales as an immutable Set view. 142 * The set has the same iteration order as this object itself. 143 * 144 * @return the locales 145 * @stable ICU 65 146 */ getULocales()147 public Set<ULocale> getULocales() { 148 return languagesAndWeights.keySet(); 149 } 150 151 /** 152 * {@inheritDoc} 153 * @stable ICU 4.4 154 */ 155 @Override toString()156 public String toString() { 157 final StringBuilder result = new StringBuilder(); 158 for (Entry<ULocale, Double> entry : languagesAndWeights.entrySet()) { 159 ULocale language = entry.getKey(); 160 double weight = entry.getValue(); 161 if (result.length() != 0) { 162 result.append(", "); 163 } 164 result.append(language); 165 if (weight != 1.0) { 166 result.append(";q=").append(weight); 167 } 168 } 169 return result.toString(); 170 } 171 172 /** 173 * {@inheritDoc} 174 * @stable ICU 4.4 175 */ 176 @Override iterator()177 public Iterator<ULocale> iterator() { 178 return languagesAndWeights.keySet().iterator(); 179 } 180 181 /** 182 * {@inheritDoc} 183 * @stable ICU 4.4 184 */ 185 @Override equals(final Object o)186 public boolean equals(final Object o) { 187 if (o == null) { 188 return false; 189 } 190 if (this == o) { 191 return true; 192 } 193 try { 194 final LocalePriorityList that = (LocalePriorityList) o; 195 return languagesAndWeights.equals(that.languagesAndWeights); 196 } catch (final RuntimeException e) { 197 return false; 198 } 199 } 200 201 /** 202 * {@inheritDoc} 203 * @stable ICU 4.4 204 */ 205 @Override hashCode()206 public int hashCode() { 207 return languagesAndWeights.hashCode(); 208 } 209 210 // ==================== Privates ==================== 211 212 LocalePriorityList(final Map<ULocale, Double> languageToWeight)213 private LocalePriorityList(final Map<ULocale, Double> languageToWeight) { 214 this.languagesAndWeights = languageToWeight; 215 } 216 217 /** 218 * Class used for building LocalePriorityLists. 219 * @stable ICU 4.4 220 */ 221 public static class Builder { 222 /** 223 * These store the input languages and weights, in chronological order, 224 * where later additions override previous ones. 225 */ 226 private Map<ULocale, Double> languageToWeight; 227 /** 228 * The builder is reusable but rarely reused. Avoid cloning the map when not needed. 229 * Exactly one of languageToWeight and built is null. 230 */ 231 private LocalePriorityList built; 232 private boolean hasWeights = false; // other than 1.0 233 234 /** 235 * Private constructor, only used by LocalePriorityList 236 */ Builder()237 private Builder() { 238 languageToWeight = new LinkedHashMap<>(); 239 } 240 Builder(LocalePriorityList list)241 private Builder(LocalePriorityList list) { 242 built = list; 243 for (Double value : list.languagesAndWeights.values()) { 244 double weight = value; 245 assert 0.0 < weight && weight <= 1.0; 246 if (weight != 1.0) { 247 hasWeights = true; 248 break; 249 } 250 } 251 } 252 253 /** 254 * Creates a LocalePriorityList. This is equivalent to 255 * {@link Builder#build(boolean) Builder.build(false)}. 256 * 257 * @return A LocalePriorityList 258 * @stable ICU 4.4 259 */ 260 public LocalePriorityList build() { 261 return build(false); 262 } 263 264 /** 265 * Creates a LocalePriorityList. 266 * 267 * @param preserveWeights when true, each locale's given weight is preserved. 268 * @return A LocalePriorityList 269 * @stable ICU 4.4 270 */ 271 public LocalePriorityList build(boolean preserveWeights) { 272 if (built != null) { 273 // Calling build() again without changing anything in between. 274 // Just return the same immutable list. 275 return built; 276 } 277 Map<ULocale, Double> temp; 278 if (hasWeights) { 279 // Walk through the input list, collecting the items with the same weights. 280 final TreeMap<Double, List<ULocale>> weightToLanguages = 281 new TreeMap<>(myDescendingDouble); 282 for (Entry<ULocale, Double> entry : languageToWeight.entrySet()) { 283 ULocale lang = entry.getKey(); 284 Double weight = entry.getValue(); 285 List<ULocale> s = weightToLanguages.get(weight); 286 if (s == null) { 287 weightToLanguages.put(weight, s = new LinkedList<>()); 288 } 289 s.add(lang); 290 } 291 // We now have a bunch of items sorted by weight, then chronologically. 292 // We can now create a list in the right order. 293 if (weightToLanguages.size() <= 1) { 294 // There is at most one weight. 295 temp = languageToWeight; 296 if (weightToLanguages.isEmpty() || weightToLanguages.firstKey() == 1.0) { 297 hasWeights = false; 298 } 299 } else { 300 temp = new LinkedHashMap<>(); 301 for (Entry<Double, List<ULocale>> langEntry : weightToLanguages.entrySet()) { 302 final Double weight = preserveWeights ? langEntry.getKey() : D1; 303 for (final ULocale lang : langEntry.getValue()) { 304 temp.put(lang, weight); 305 } 306 } 307 } 308 } else { 309 // Nothing to sort. 310 temp = languageToWeight; 311 } 312 languageToWeight = null; 313 return built = new LocalePriorityList(Collections.unmodifiableMap(temp)); 314 } 315 316 /** 317 * Adds locales with weights. 318 * 319 * @param list list of locales with weights 320 * @return this, for chaining 321 * @stable ICU 4.4 322 */ 323 public Builder add(final LocalePriorityList list) { 324 for (Entry<ULocale, Double> entry : list.languagesAndWeights.entrySet()) { 325 add(entry.getKey(), entry.getValue()); 326 } 327 return this; 328 } 329 330 /** 331 * Adds a locale with weight 1.0. 332 * 333 * @param locale to add with weight 1.0 334 * @return this, for chaining 335 * @stable ICU 4.4 336 */ 337 public Builder add(final ULocale locale) { 338 return add(locale, 1.0); 339 } 340 341 /** 342 * Adds locales, each with weight 1.0. 343 * 344 * @param locales locales/languages to be added 345 * @return this, for chaining. 346 * @stable ICU 4.4 347 */ 348 public Builder add(ULocale... locales) { 349 for (final ULocale languageCode : locales) { 350 add(languageCode, 1.0); 351 } 352 return this; 353 } 354 355 /** 356 * Adds a locale with a specified weight. 357 * Overrides any previous weight for the locale. 358 * A zero or negative weight leads to removing the locale. 359 * A weight greater than 1 is pinned to 1. 360 * 361 * @param locale language/locale to add 362 * @param weight value between 0.0 and 1.1 363 * @return this, for chaining. 364 * @stable ICU 4.4 365 */ 366 public Builder add(final ULocale locale, double weight) { 367 if (languageToWeight == null) { 368 // Builder reuse after build(). 369 languageToWeight = new LinkedHashMap<>(built.languagesAndWeights); 370 built = null; 371 } 372 if (languageToWeight.containsKey(locale)) { 373 languageToWeight.remove(locale); 374 } 375 Double value; 376 if (weight <= 0.0) { 377 return this; // skip zeros 378 } else if (weight >= 1.0) { 379 value = D1; 380 } else { 381 value = weight; 382 hasWeights = true; 383 } 384 languageToWeight.put(locale, value); 385 return this; 386 } 387 388 /** 389 * Parses the RFC 2616 string, and adds locales with weights accordingly. 390 * 391 * @param acceptLanguageList in RFC 2616 format (leniently parsed) 392 * @return this, for chaining. 393 * @stable ICU 4.4 394 */ 395 public Builder add(final String acceptLanguageList) { 396 final String[] items = languageSplitter.split(acceptLanguageList.trim()); 397 final Matcher itemMatcher = weightSplitter.matcher(""); 398 for (final String item : items) { 399 if (itemMatcher.reset(item).matches()) { 400 final ULocale language = new ULocale(itemMatcher.group(1)); 401 final double weight = Double.parseDouble(itemMatcher.group(2)); 402 if (!(0.0 <= weight && weight <= 1.0)) { // do ! for NaN 403 throw new IllegalArgumentException( 404 "Illegal weight, must be 0..1: " + weight); 405 } 406 add(language, weight); 407 } else if (item.length() != 0) { 408 add(new ULocale(item)); 409 } 410 } 411 return this; 412 } 413 } 414 415 private static Comparator<Double> myDescendingDouble = new Comparator<Double>() { 416 @Override 417 public int compare(Double o1, Double o2) { 418 int result = o1.compareTo(o2); 419 return result > 0 ? -1 : result < 0 ? 1 : 0; // Reverse the order. 420 } 421 }; 422 } 423