1 package org.unicode.cldr.util.personname; 2 3 import java.util.ArrayList; 4 import java.util.Arrays; 5 import java.util.Collection; 6 import java.util.Comparator; 7 import java.util.EnumSet; 8 import java.util.HashSet; 9 import java.util.Iterator; 10 import java.util.LinkedHashSet; 11 import java.util.List; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Objects; 15 import java.util.Set; 16 import java.util.TreeMap; 17 import java.util.TreeSet; 18 import java.util.regex.Pattern; 19 20 import org.unicode.cldr.util.CLDRFile; 21 import org.unicode.cldr.util.ChainedMap; 22 import org.unicode.cldr.util.ChainedMap.M3; 23 import org.unicode.cldr.util.LanguageTagParser; 24 import org.unicode.cldr.util.Pair; 25 import org.unicode.cldr.util.XPathParts; 26 27 import com.google.common.base.Joiner; 28 import com.google.common.base.Splitter; 29 import com.google.common.collect.Comparators; 30 import com.google.common.collect.ComparisonChain; 31 import com.google.common.collect.ImmutableBiMap; 32 import com.google.common.collect.ImmutableList; 33 import com.google.common.collect.ImmutableListMultimap; 34 import com.google.common.collect.ImmutableMap; 35 import com.google.common.collect.ImmutableSet; 36 import com.google.common.collect.LinkedListMultimap; 37 import com.google.common.collect.ListMultimap; 38 import com.google.common.collect.Maps; 39 import com.google.common.collect.Multimap; 40 import com.google.common.collect.Multiset; 41 import com.google.common.collect.TreeMultimap; 42 import com.google.common.collect.TreeMultiset; 43 import com.ibm.icu.lang.UCharacter; 44 import com.ibm.icu.text.BreakIterator; 45 import com.ibm.icu.text.CaseMap; 46 import com.ibm.icu.text.MessageFormat; 47 import com.ibm.icu.util.Output; 48 import com.ibm.icu.util.ULocale; 49 50 /** 51 * Rough sketch for now 52 * TODO Mark Make classes/methods private that don't need to be public 53 * TODO Peter Check for invalid parameters 54 */ 55 56 public class PersonNameFormatter { 57 58 public static final boolean DEBUG = System.getProperty("PersonNameFormatter.DEBUG") != null; 59 60 public enum Field { 61 prefix, 62 given, 63 given2, 64 surname, 65 surname2, 66 suffix; 67 public static final Comparator<Iterable<Field>> ITERABLE_COMPARE = Comparators.lexicographical(Comparator.<Field>naturalOrder()); 68 public static final Set<Field> ALL = ImmutableSet.copyOf(Field.values()); 69 } 70 71 public enum Order { 72 givenFirst, 73 surnameFirst, 74 sorting; 75 public static final Comparator<Iterable<Order>> ITERABLE_COMPARE = Comparators.lexicographical(Comparator.<Order>naturalOrder()); 76 public static final Set<Order> ALL = ImmutableSet.copyOf(Order.values()); 77 /** 78 * Use this instead of valueOf if value might be null 79 */ from(String item)80 public static Order from(String item) { 81 return item == null ? null : Order.valueOf(item); 82 } 83 } 84 85 public enum Length { 86 // There is a slight complication because 'long' collides with a keyword. 87 long_name, 88 medium, 89 short_name; 90 91 private static ImmutableBiMap<String,Length> exceptionNames = ImmutableBiMap.of( 92 "long", long_name, 93 "short", short_name); 94 95 /** 96 * Use this instead of valueOf 97 */ from(String item)98 public static Length from(String item) { 99 if (item == null) { 100 return null; 101 } 102 Length result = exceptionNames.get(item); 103 return result != null ? result : Length.valueOf(item); 104 } 105 @Override toString()106 public String toString() { 107 String result = exceptionNames.inverse().get(this); 108 return result != null ? result : name(); 109 } 110 111 public static final Comparator<Iterable<Length>> ITERABLE_COMPARE = Comparators.lexicographical(Comparator.<Length>naturalOrder()); 112 public static final Set<Length> ALL = ImmutableSet.copyOf(Length.values()); 113 } 114 115 public enum Usage { 116 referring, 117 addressing, 118 monogram; 119 public static final Comparator<Iterable<Usage>> ITERABLE_COMPARE = Comparators.lexicographical(Comparator.<Usage>naturalOrder()); 120 public static final Set<Usage> ALL = ImmutableSet.copyOf(Usage.values()); 121 /** 122 * Use this instead of valueOf if value might be null 123 */ from(String item)124 public static Usage from(String item) { 125 return item == null ? null : Usage.valueOf(item); 126 } 127 } 128 129 public enum Formality { 130 formal, 131 informal; 132 public static final Comparator<Iterable<Formality>> ITERABLE_COMPARE = Comparators.lexicographical(Comparator.<Formality>naturalOrder()); 133 public static final Set<Formality> ALL = ImmutableSet.copyOf(Formality.values()); 134 /** 135 * Use this instead of valueOf if value might be null 136 */ from(String item)137 public static Formality from(String item) { 138 return item == null ? null : Formality.valueOf(item); 139 } 140 } 141 142 public enum Modifier { 143 informal, 144 allCaps, 145 initialCap, 146 initial, 147 monogram, 148 prefix, 149 core, 150 ; 151 public static final Comparator<Iterable<Modifier>> ITERABLE_COMPARE = Comparators.lexicographical(Comparator.<Modifier>naturalOrder()); 152 public static final Comparator<Collection<Modifier>> LONGEST_FIRST = new Comparator<>() { 153 154 @Override 155 public int compare(Collection<Modifier> o1, Collection<Modifier> o2) { 156 return ComparisonChain.start() 157 .compare(o2.size(), o1.size()) // reversed order for longest first 158 .compare(o1, o2, ITERABLE_COMPARE) 159 .result(); 160 } 161 162 }; 163 public static final Set<Modifier> ALL = ImmutableSet.copyOf(Modifier.values()); 164 public static final Set<Modifier> EMPTY = ImmutableSet.of(); 165 166 static final Set<Set<Modifier>> INCONSISTENT_SETS = ImmutableSet.of( 167 ImmutableSet.of(Modifier.core, Modifier.prefix), 168 ImmutableSet.of(Modifier.initial, Modifier.monogram), 169 ImmutableSet.of(Modifier.allCaps, Modifier.initialCap) 170 ); 171 172 /** 173 * If the input modifiers are consistent, returns an ordered set; if not, returns null and sets an error message. 174 */ getCleanSet(Collection<Modifier> modifierList, Output<String> errorMessage)175 public static Set<Modifier> getCleanSet(Collection<Modifier> modifierList, Output<String> errorMessage) { 176 if (modifierList.isEmpty()) { 177 return ImmutableSet.of(); 178 } 179 Set<Modifier> modifiers = EnumSet.copyOf(modifierList); 180 String errorMessage1 = null; 181 if (modifiers.size() != modifierList.size()) { 182 Multiset<Modifier> dupCheck = TreeMultiset.create(); 183 dupCheck.addAll(modifierList); 184 for (Modifier m : modifiers) { 185 dupCheck.remove(m); 186 } 187 errorMessage1 = "Duplicate modifiers: " + JOIN_COMMA.join(dupCheck); 188 } 189 String errorMessage2 = null; 190 for (Set<Modifier> inconsistentSet : INCONSISTENT_SETS) { 191 if (modifiers.containsAll(inconsistentSet)) { 192 if (errorMessage2 == null) { 193 errorMessage2 = "Inconsistent modifiers: "; 194 } else { 195 errorMessage2 += ", "; 196 } 197 errorMessage2 += inconsistentSet; 198 } 199 } 200 errorMessage.value = errorMessage1 == null ? errorMessage2 201 : errorMessage2 == null ? errorMessage1 202 : errorMessage1 + "; " + errorMessage1; 203 return ImmutableSet.copyOf(modifiers); 204 } 205 206 /** 207 * Verifies that the prefix, core, and plain values are consistent. Returns null if ok, otherwise error message. 208 */ inconsistentPrefixCorePlainValues(String prefixValue, String coreValue, String plainValue)209 public static String inconsistentPrefixCorePlainValues(String prefixValue, String coreValue, String plainValue) { 210 String errorMessage2 = null; 211 if (prefixValue != null) { 212 if (coreValue != null) { 213 if (plainValue != null) { // prefix = X, core = Y, plain = Z 214 // ok: prefix = "van", core = "Berg", plain = "van Berg" 215 // bad: prefix = "van", core = "Berg", plain = "van Wolf" 216 if (!plainValue.replace(prefixValue, "").trim().equals(coreValue)) { 217 errorMessage2 = "-core value and -prefix value are inconsistent with plain value"; 218 } 219 } 220 // otherwise prefix = "x", core = "y", plain = null, so OK 221 } else { // prefix = X, core = null, plain = ? 222 errorMessage2 = "cannot have -prefix without -core"; 223 } 224 } else if (coreValue != null && plainValue != null && !plainValue.equals(coreValue)) { 225 errorMessage2 = "There is no -prefix, but there is a -core and plain that are unequal"; 226 } 227 return errorMessage2; 228 } 229 } 230 231 /** 232 * Types of samples, only for use by CLDR 233 * @internal 234 */ 235 public enum SampleType { 236 givenOnly, 237 givenSurnameOnly, 238 given12Surname, 239 full, 240 foreign; 241 public static final Set<SampleType> ALL = ImmutableSet.of(givenOnly, 242 givenSurnameOnly, 243 given12Surname, 244 full); // exclude foreign for now 245 } 246 247 /** 248 * @internal (all of these) 249 */ 250 public static final Splitter SPLIT_SPACE = Splitter.on(' ').trimResults(); 251 public static final Splitter SPLIT_DASH = Splitter.on('-').trimResults(); 252 public static final Splitter SPLIT_EQUALS = Splitter.on('=').trimResults(); 253 public static final Splitter SPLIT_COMMA = Splitter.on(',').trimResults(); 254 public static final Splitter SPLIT_SEMI = Splitter.on(';').trimResults(); 255 256 public static final Joiner JOIN_SPACE = Joiner.on(' '); 257 public static final Joiner JOIN_DASH = Joiner.on('-'); 258 public static final Joiner JOIN_SEMI = Joiner.on("; "); 259 public static final Joiner JOIN_COMMA = Joiner.on(", "); 260 public static final Joiner JOIN_LFTB = Joiner.on("\n\t\t"); 261 262 /** 263 * A Field and its modifiers, corresponding to a string form like {given-initial}. 264 * Immutable 265 */ 266 public static class ModifiedField implements Comparable<ModifiedField> { 267 private final Field field; 268 private final Set<Modifier> modifiers; 269 getField()270 public Field getField() { 271 return field; 272 } getModifiers()273 public Set<Modifier> getModifiers() { 274 return modifiers; 275 } 276 ModifiedField(Field field, Collection<Modifier> modifiers)277 public ModifiedField(Field field, Collection<Modifier> modifiers) { 278 this.field = field; 279 Output<String> errorMessage = new Output<>(); 280 this.modifiers = Modifier.getCleanSet(modifiers, errorMessage); 281 if (errorMessage.value != null) { 282 throw new IllegalArgumentException(errorMessage.value); 283 } 284 } 285 286 /** convenience method for testing */ ModifiedField(Field field, Modifier... modifiers)287 public ModifiedField(Field field, Modifier... modifiers) { 288 this(field, Arrays.asList(modifiers)); 289 } 290 291 /** convenience method for testing */ from(String string)292 public static ModifiedField from(String string) { 293 Field field = null; 294 List<Modifier> modifiers = new ArrayList<>(); 295 for (String item : SPLIT_DASH.split(string)) { 296 if (field == null) { 297 field = Field.valueOf(item); 298 } else { 299 modifiers.add(Modifier.valueOf(item)); 300 } 301 } 302 return new ModifiedField(field, modifiers); 303 } 304 305 @Override toString()306 public String toString() { 307 StringBuilder result = new StringBuilder(); 308 result.append(field); 309 if (!modifiers.isEmpty()) { 310 result.append('-').append(JOIN_DASH.join(modifiers)); 311 } 312 return result.toString(); 313 } 314 @Override equals(Object obj)315 public boolean equals(Object obj) { 316 ModifiedField that = (ModifiedField) obj; 317 return field == that.field && modifiers.equals(that.modifiers); 318 } 319 @Override hashCode()320 public int hashCode() { 321 return field.hashCode() ^ modifiers.hashCode(); 322 } 323 @Override compareTo(ModifiedField o)324 public int compareTo(ModifiedField o) { 325 return ComparisonChain.start() 326 .compare(field, o.field) 327 .compare(modifiers, o.modifiers, Modifier.ITERABLE_COMPARE) 328 .result(); 329 } 330 } 331 332 /** 333 * An element of a name pattern: either a literal string (like ", ") or a modified field (like {given-initial}) 334 * The literal is null IFF the modifiedField is not null 335 * Immutable 336 * @internal 337 */ 338 public static class NamePatternElement implements Comparable<NamePatternElement> { 339 private final String literal; 340 private final ModifiedField modifiedField; 341 342 getLiteral()343 public String getLiteral() { 344 return literal; 345 } getModifiedField()346 public ModifiedField getModifiedField() { 347 return modifiedField; 348 } 349 350 /** 351 * @param literal 352 * @param field 353 * @param modifiers 354 */ NamePatternElement(ModifiedField modifiedField)355 public NamePatternElement(ModifiedField modifiedField) { 356 this.literal = null; 357 this.modifiedField = modifiedField; 358 } 359 NamePatternElement(String literal)360 public NamePatternElement(String literal) { 361 this.literal = literal; 362 this.modifiedField = null; 363 } 364 365 /** convenience method for testing */ from(Object element)366 public static NamePatternElement from(Object element) { 367 if (element instanceof ModifiedField) { 368 return new NamePatternElement((ModifiedField) element); 369 } else { 370 String string = element.toString(); 371 if (string.startsWith("{") && string.endsWith("}")) { 372 return new NamePatternElement(ModifiedField.from(string.substring(1, string.length()-1))); 373 } else { 374 return new NamePatternElement(string); 375 } 376 } 377 } 378 @Override toString()379 public String toString() { 380 return literal != null ? literal.replace("\\", "\\\\").replace("{", "\\{") : modifiedField.toString(); 381 } 382 383 public static final Comparator<Iterable<NamePatternElement>> ITERABLE_COMPARE = Comparators.lexicographical(Comparator.<NamePatternElement>naturalOrder()); 384 385 386 @Override compareTo(NamePatternElement o)387 public int compareTo(NamePatternElement o) { 388 if (literal != null && o.literal != null) { 389 return literal.compareTo(o.literal); 390 } else if (modifiedField != null && o.modifiedField != null) { 391 return modifiedField.compareTo(o.modifiedField); 392 } else { 393 return literal != null ? -1 : 1; // all literals are less than all modified fields 394 } 395 } 396 } 397 398 /** 399 * Format fallback results, for when modifiers are not found 400 * NOTE: CLDR needs to be able to create from data. 401 * @internal 402 */ 403 public static class FallbackFormatter { 404 final private ULocale formatterLocale; 405 final private BreakIterator characterBreakIterator; 406 final private MessageFormat initialFormatter; 407 final private MessageFormat initialSequenceFormatter; 408 final private String foreignSpaceReplacement; 409 getForeignSpaceReplacement()410 public String getForeignSpaceReplacement() { 411 return foreignSpaceReplacement; 412 } 413 414 final private boolean uppercaseSurnameIfSurnameFirst; 415 FallbackFormatter(ULocale uLocale, String initialPattern, String initialSequencePattern, String foreignSpaceReplacement, boolean uppercaseSurnameIfSurnameFirst)416 public FallbackFormatter(ULocale uLocale, 417 String initialPattern, 418 String initialSequencePattern, 419 String foreignSpaceReplacement, 420 boolean uppercaseSurnameIfSurnameFirst) { 421 formatterLocale = uLocale; 422 characterBreakIterator = BreakIterator.getCharacterInstance(uLocale); 423 initialFormatter = new MessageFormat(initialPattern); 424 initialSequenceFormatter = new MessageFormat(initialSequencePattern); 425 this.foreignSpaceReplacement = foreignSpaceReplacement; 426 this.uppercaseSurnameIfSurnameFirst = uppercaseSurnameIfSurnameFirst; 427 } 428 429 /** 430 * Apply the fallbacks for modifiers that are not handled. Public for testing. 431 * @internal 432 */ applyModifierFallbacks(FormatParameters nameFormatParameters, Set<Modifier> remainingModifers, String bestValue)433 public String applyModifierFallbacks(FormatParameters nameFormatParameters, Set<Modifier> remainingModifers, String bestValue) { 434 // apply default algorithms 435 436 for (Modifier modifier : remainingModifers) { 437 switch(modifier) { 438 case initial: 439 bestValue = formatInitial(bestValue, nameFormatParameters); 440 break; 441 case monogram: 442 bestValue = formatMonogram(bestValue, nameFormatParameters); 443 break; 444 case initialCap: 445 bestValue = TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(formatterLocale.toLocale(), null, bestValue); 446 break; 447 case allCaps: 448 bestValue = UCharacter.toUpperCase(formatterLocale, bestValue); 449 break; 450 case prefix: 451 bestValue = null; 452 // TODO Mark if there is no plain, but there is a prefix and core, use that; otherwise use core 453 break; 454 case core: 455 case informal: 456 // no option, just fall back 457 break; 458 } 459 } 460 return bestValue; 461 } 462 formatInitial(String bestValue, FormatParameters nameFormatParameters)463 public String formatInitial(String bestValue, FormatParameters nameFormatParameters) { 464 // It is probably unusual to have multiple name fields, so this could be optimized for 465 // the simpler case. 466 467 // Employ both the initialFormatter and initialSequenceFormatter 468 469 String result = null; 470 for(String part : SPLIT_SPACE.split(bestValue)) { 471 String partFirst = getFirstGrapheme(part); 472 bestValue = initialFormatter.format(new String[] {partFirst}); 473 if (result == null) { 474 result = bestValue; 475 } else { 476 result = initialSequenceFormatter.format(new String[] {result, bestValue}); 477 } 478 } 479 return result; 480 } 481 formatMonogram(String bestValue, FormatParameters nameFormatParameters)482 public String formatMonogram(String bestValue, FormatParameters nameFormatParameters) { 483 // It is probably unusual to have multiple name fields, so this could be optimized for 484 // the simpler case. 485 486 // For the case of monograms, don't use the initialFormatter or initialSequenceFormatter 487 // And just take the first grapheme. 488 489 return getFirstGrapheme(bestValue); 490 } 491 492 getFirstGrapheme(String bestValue)493 private String getFirstGrapheme(String bestValue) { 494 characterBreakIterator.setText(bestValue); 495 bestValue = bestValue.substring(0, characterBreakIterator.next()); 496 return bestValue; 497 } 498 formatAllCaps(String bestValue)499 public String formatAllCaps(String bestValue) { 500 return UCharacter.toUpperCase(formatterLocale, bestValue); 501 } 502 503 /** 504 * Apply other modifications. Currently just the surname capitalization, but can be extended in the future. 505 * @param modifiedField 506 */ tweak(ModifiedField modifiedField, String bestValue, FormatParameters nameFormatParameters)507 public String tweak(ModifiedField modifiedField, String bestValue, FormatParameters nameFormatParameters) { 508 if (uppercaseSurnameIfSurnameFirst 509 && nameFormatParameters.matchesOrder(Order.surnameFirst) 510 && (modifiedField.getField() == Field.surname || modifiedField.getField() == Field.surname2)) { 511 bestValue = UCharacter.toUpperCase(formatterLocale, bestValue); 512 } 513 return bestValue; 514 } 515 } 516 517 /** 518 * A name pattern, corresponding to a string such as "{given-initial} {surname}" 519 * Immutable 520 * NOTE: CLDR needs to be able to create from data. 521 * @internal 522 */ 523 public static class NamePattern implements Comparable<NamePattern> { 524 private final int rank; 525 private final List<NamePatternElement> elements; 526 private final Set<Field> fields; 527 getFields()528 public Set<Field> getFields() { 529 return ImmutableSet.copyOf(fields); 530 } 531 getFieldsSize()532 public int getFieldsSize() { 533 return fields.size(); 534 } 535 536 /** 537 * Return the rank order (0, 1, ...) in a list 538 * @return 539 */ getRank()540 public int getRank() { 541 return rank; 542 } 543 format(NameObject nameObject, FormatParameters nameFormatParameters, FallbackFormatter fallbackInfo)544 public String format(NameObject nameObject, FormatParameters nameFormatParameters, FallbackFormatter fallbackInfo) { 545 StringBuilder result = new StringBuilder(); 546 boolean seenLeadingField = false; 547 boolean seenEmptyLeadingField = false; 548 boolean seenEmptyField = false; 549 StringBuilder literalTextBefore = new StringBuilder(); 550 StringBuilder literalTextAfter = new StringBuilder(); 551 552 for (NamePatternElement element : elements) { 553 final String literal = element.getLiteral(); 554 if (literal != null) { 555 if (seenEmptyLeadingField) { 556 // do nothing; throw away the literal text 557 } else if (seenEmptyField) { 558 literalTextAfter.append(literal); 559 } else { 560 literalTextBefore.append(literal); 561 } 562 } else { 563 String bestValue = getBestValueForNameObject(nameObject, element, nameFormatParameters, fallbackInfo); 564 if (bestValue == null) { 565 if (!seenLeadingField) { 566 seenEmptyLeadingField = true; 567 literalTextBefore.setLength(0); 568 } else { 569 seenEmptyField = true; 570 literalTextAfter.setLength(0); 571 } 572 } else { 573 seenLeadingField = true; 574 seenEmptyLeadingField = false; 575 if (seenEmptyField) { 576 result.append(coalesceLiterals(literalTextBefore, literalTextAfter)); 577 result.append(bestValue); 578 seenEmptyField = false; 579 } else { 580 result.append(literalTextBefore); 581 literalTextBefore.setLength(0); 582 result.append(bestValue); 583 } 584 } 585 } 586 } 587 if (!seenEmptyField) { 588 result.append(literalTextBefore); 589 } 590 if (fallbackInfo.foreignSpaceReplacement != null && !fallbackInfo.foreignSpaceReplacement.equals(" ")) { 591 ULocale nameLocale = nameObject.getNameLocale(); 592 if (!sharesLanguageScript(nameLocale, fallbackInfo.formatterLocale)) { 593 return SPACES.matcher(result).replaceAll(fallbackInfo.foreignSpaceReplacement); 594 } 595 } 596 return result.toString(); 597 } 598 sharesLanguageScript(ULocale nameLocale, ULocale formatterLocale)599 private boolean sharesLanguageScript(ULocale nameLocale, ULocale formatterLocale) { 600 return Objects.equals(nameLocale, formatterLocale); // TODO, fix to check language and script (maximized) 601 } 602 603 static final Pattern SPACES = Pattern.compile("\\s+"); // TODO pick whitespace 604 getBestValueForNameObject(NameObject nameObject, NamePatternElement element, FormatParameters nameFormatParameters, FallbackFormatter fallbackInfo)605 private String getBestValueForNameObject(NameObject nameObject, NamePatternElement element, FormatParameters nameFormatParameters, FallbackFormatter fallbackInfo) { 606 Set<Modifier> remainingModifers = EnumSet.noneOf(Modifier.class); 607 final ModifiedField modifiedField = element.getModifiedField(); 608 String bestValue = nameObject.getBestValue(modifiedField, remainingModifers); 609 if (bestValue == null) { 610 return null; 611 } 612 if (!remainingModifers.isEmpty()) { 613 bestValue = fallbackInfo.applyModifierFallbacks(nameFormatParameters, remainingModifers, bestValue); 614 } 615 return fallbackInfo.tweak(modifiedField, bestValue, nameFormatParameters); 616 } 617 coalesceLiterals(StringBuilder l1, StringBuilder l2)618 private String coalesceLiterals(StringBuilder l1, StringBuilder l2) { 619 // get the range of nonwhitespace characters at the beginning of l1 620 int p1 = 0; 621 while (p1 < l1.length() && !Character.isWhitespace(l1.charAt(p1))) { 622 ++p1; 623 } 624 625 // get the range of nonwhitespace characters at the end of l2 626 int p2 = l2.length() - 1; 627 while (p2 >= 0 && !Character.isWhitespace(l2.charAt(p2))) { 628 --p2; 629 } 630 631 // also include one whitespace character from l1 or, if there aren't 632 // any, one whitespace character from l2 633 if (p1 < l1.length()) { 634 ++p1; 635 } else if (p2 >= 0) { 636 --p2; 637 } 638 639 // concatenate those two ranges to get the coalesced literal text 640 String result = l1.substring(0, p1) + l2.substring(p2 + 1); 641 642 // clear out l1 and l2 (done here to improve readability in format() above)) 643 l1.setLength(0); 644 l2.setLength(0); 645 646 return result; 647 } 648 NamePattern(int rank, List<NamePatternElement> elements)649 public NamePattern(int rank, List<NamePatternElement> elements) { 650 this.rank = rank; 651 this.elements = elements; 652 Set<Field> result = EnumSet.noneOf(Field.class); 653 for (NamePatternElement element : elements) { 654 ModifiedField modifiedField = element.getModifiedField(); 655 if (modifiedField != null) { 656 result.add(modifiedField.getField()); 657 } 658 } 659 this.fields = ImmutableSet.copyOf(result); 660 } 661 662 /** convenience method for testing */ from(int rank, Object... elements)663 public static NamePattern from(int rank, Object... elements) { 664 return new NamePattern(rank, makeList(elements)); 665 } 666 667 /** convenience method for testing */ from(int rank, String patternString)668 public static NamePattern from(int rank, String patternString) { 669 return new NamePattern(rank, parse(patternString)); 670 } 671 672 private static final Set<Character> ALLOWED_ESCAPED_CHARACTERS = new HashSet<>(Arrays.asList('\\', '{', '}')); 673 parse(String patternString)674 private static List<NamePatternElement> parse(String patternString) { 675 List<NamePatternElement> result = new ArrayList<>(); 676 677 String rawValue = ""; 678 Boolean curlyStarted = false; 679 final int patternLength = patternString.length(); 680 int i = 0; 681 while (i < patternLength) { 682 final Character currentCharacter = patternString.charAt(i); // this is safe, since syntax is ASCII 683 684 switch (currentCharacter) { 685 case '\\': 686 if (i + 1 < patternLength) { 687 final Character nextCharacter = patternString.charAt(i + 1); 688 if (!ALLOWED_ESCAPED_CHARACTERS.contains(nextCharacter)) { 689 throwParseError(String.format("Escaping character '%c' is not supported", nextCharacter), patternString, i); 690 } 691 692 rawValue += nextCharacter; 693 i += 2; 694 continue; 695 } else { 696 throwParseError("Invalid character: ", patternString, i); 697 } 698 699 case '{': 700 if (curlyStarted) { 701 throwParseError("Unexpected {: ", patternString, i); 702 } 703 curlyStarted = true; 704 if (!rawValue.isEmpty()) { 705 result.add(new NamePatternElement(rawValue)); 706 rawValue = ""; 707 } 708 break; 709 710 case '}': 711 if (!curlyStarted) { 712 throwParseError("Unexpected }", patternString, i); 713 } 714 curlyStarted = false; 715 if (rawValue.isEmpty()) { 716 throwParseError("Empty field '{}' is not allowed ", patternString, i); 717 } else { 718 try { 719 result.add(new NamePatternElement(ModifiedField.from(rawValue))); 720 } catch (Exception e) { 721 throwParseError("Invalid field: ", rawValue, 0); 722 } 723 rawValue = ""; 724 } 725 break; 726 727 default: 728 rawValue += currentCharacter; 729 break; 730 } 731 732 i++; 733 } 734 735 if (curlyStarted) { 736 throwParseError("Unmatched {", patternString, patternString.length()); 737 } 738 if (!rawValue.isEmpty()) { 739 result.add(new NamePatternElement(rawValue)); 740 } 741 742 return result; 743 } 744 745 private static String BAD_POSITION = "❌"; 746 throwParseError(String message, String patternString, int i)747 private static void throwParseError(String message, String patternString, int i) { 748 throw new IllegalArgumentException(message + ": " + "«" + patternString.substring(0,i) + BAD_POSITION + patternString.substring(i) + "»"); 749 } 750 makeList(Object... elements2)751 private static List<NamePatternElement> makeList(Object... elements2) { 752 List<NamePatternElement> result = new ArrayList<>(); 753 for (Object element : elements2) { 754 result.add(NamePatternElement.from(element)); 755 } 756 return result; 757 } 758 759 @Override toString()760 public String toString() { 761 StringBuilder result = new StringBuilder("\""); 762 for (NamePatternElement element : elements) { 763 if (element.literal != null) { 764 for (final Character c : element.literal.toCharArray()) { 765 if (ALLOWED_ESCAPED_CHARACTERS.contains(c)) { 766 result.append('\\'); 767 } 768 result.append(c); 769 } 770 } else { 771 result.append('{').append(element).append('}'); 772 } 773 } 774 return result.append("\"").toString(); 775 } 776 public static final Comparator<Iterable<NamePattern>> ITERABLE_COMPARE = Comparators.lexicographical(Comparator.<NamePattern>naturalOrder()); 777 778 @Override 779 /** 780 * Compares first by fields, then by the string value (later case would be unusual) 781 */ compareTo(NamePattern o)782 public int compareTo(NamePattern o) { 783 return ComparisonChain.start() 784 .compare(rank, o.rank) 785 .compare(fields, o.fields, Field.ITERABLE_COMPARE) 786 .compare(elements, o.elements, NamePatternElement.ITERABLE_COMPARE) 787 .result(); 788 } 789 @Override equals(Object obj)790 public boolean equals(Object obj) { 791 return compareTo((NamePattern)obj) == 0; // no need to optimize 792 } 793 @Override hashCode()794 public int hashCode() { 795 return Objects.hash(rank, fields, elements); 796 } 797 798 /** 799 * Utility for testing validity 800 * @return 801 */ getFieldPositions()802 public Multimap<Field, Integer> getFieldPositions() { 803 Multimap<Field, Integer> result = TreeMultimap.create(); 804 int i = -1; 805 for (NamePatternElement element : elements) { 806 ++i; 807 if (element.literal == null) { 808 result.put(element.modifiedField.field, i); 809 } 810 } 811 return result; 812 } 813 814 /** 815 * Get the number of elements (literals and modified fields) in the pattern. 816 */ getElementCount()817 public int getElementCount() { 818 return elements.size(); 819 } 820 821 /** 822 * Get the nth literal (or null if the nth element is a field) 823 */ getLiteral(int index)824 public String getLiteral(int index) { 825 return elements.get(index).literal; 826 } 827 828 /** 829 * Get the nth modified field (or null if the nth element is a literal) 830 */ getModifiedField(int index)831 public ModifiedField getModifiedField(int index) { 832 return elements.get(index).modifiedField; 833 } 834 835 /** 836 * @internal 837 */ firstLiteralContaining(String item)838 public String firstLiteralContaining(String item) { 839 for (NamePatternElement element : elements) { 840 final String literal = element.literal; 841 if (literal != null && literal.contains(item)) { 842 return literal; 843 } 844 } 845 return null; 846 } 847 } 848 849 /** 850 * Input parameters, such as {length=long_name, formality=informal}. Unmentioned items are null, and match any value. 851 * Passed in when formatting. 852 */ 853 public static class FormatParameters implements Comparable<FormatParameters> { 854 private final Order order; 855 private final Length length; 856 private final Usage usage; 857 private final Formality formality; 858 859 /** 860 * Normally we don't often need to create one FormalParameters from another. 861 * The one exception is the order, which comes from the NameObject. 862 */ setOrder(Order order)863 public FormatParameters setOrder(Order order) { 864 return new FormatParameters(order, length, usage, formality); 865 } 866 867 /** 868 * Get the order; null means "any order" 869 */ getOrder()870 public Order getOrder() { 871 return order; 872 } 873 874 /** 875 * Get the length; null means "any length" 876 */ getLength()877 public Length getLength() { 878 return length; 879 } 880 881 /** 882 * Get the usage; null means "any usage" 883 */ getUsage()884 public Usage getUsage() { 885 return usage; 886 } 887 888 /** 889 * Get the formality; null means "any formality" 890 */ getFormality()891 public Formality getFormality() { 892 return formality; 893 } 894 matches(FormatParameters other)895 public boolean matches(FormatParameters other) { 896 return matchesOrder(other.order) 897 && matchesLength(other.length) 898 && matchesUsage(other.usage) 899 && matchesFormality(other.formality); 900 } 901 902 /** 903 * Utility methods for matching, taking into account that null matches anything 904 */ matchesOrder(Order otherOrder)905 public boolean matchesOrder(Order otherOrder) { 906 return order == null || otherOrder == null || order == otherOrder; 907 } matchesFormality(final Formality otherFormality)908 public boolean matchesFormality(final Formality otherFormality) { 909 return formality == null || otherFormality == null || formality == otherFormality; 910 } matchesUsage(final Usage otherUsage)911 public boolean matchesUsage(final Usage otherUsage) { 912 return usage == null || otherUsage == null || usage == otherUsage; 913 } matchesLength(final Length otherLength)914 private boolean matchesLength(final Length otherLength) { 915 return length == null || otherLength == null || length == otherLength; 916 } 917 FormatParameters(Order order, Length length, Usage usage, Formality formality)918 public FormatParameters(Order order, Length length, Usage usage, Formality formality) { 919 this.order = order; 920 this.length = length; 921 this.usage = usage; 922 this.formality = formality; 923 } 924 925 @Override toString()926 public String toString() { 927 List<String> items = new ArrayList<>(); 928 if (order != null) { 929 items.add("order='" + order + "'"); 930 } 931 if (length != null) { 932 items.add("length='" + length + "'"); 933 } 934 if (usage != null) { 935 items.add("usage='" + usage + "'"); 936 } 937 if (formality != null) { 938 items.add("formality='" + formality + "'"); 939 } 940 return JOIN_SPACE.join(items); 941 } 942 abbreviated()943 public String abbreviated() { 944 List<String> items = new ArrayList<>(); 945 if (order != null) { 946 items.add(order.toString().substring(0,3)); 947 } 948 if (length != null) { 949 items.add(length.toString().substring(0,3)); 950 } 951 if (usage != null) { 952 items.add(usage.toString().substring(0,3)); 953 } 954 if (formality != null) { 955 items.add(formality.toString().substring(0,3)); 956 } 957 return JOIN_DASH.join(items); 958 } 959 dashed()960 public String dashed() { 961 List<String> items = new ArrayList<>(); 962 if (order != null) { 963 items.add(order.toString()); 964 } 965 if (length != null) { 966 items.add(length.toString()); 967 } 968 if (usage != null) { 969 items.add(usage.toString()); 970 } 971 if (formality != null) { 972 items.add(formality.toString()); 973 } 974 return JOIN_DASH.join(items); 975 } 976 from(String string)977 public static FormatParameters from(String string) { 978 Order order = null; 979 Length length = null; 980 Usage usage = null; 981 Formality formality = null; 982 for (String part : SPLIT_SEMI.split(string)) { 983 List<String> parts = SPLIT_EQUALS.splitToList(part); 984 if (parts.size() != 2) { 985 throw new IllegalArgumentException("must be of form length=medium; formality=… : " + string); 986 } 987 final String key = parts.get(0); 988 final String value = parts.get(1); 989 switch(key) { 990 case "order": 991 order = Order.valueOf(value); 992 break; 993 case "length": 994 length = Length.from(value); 995 break; 996 case "usage": 997 usage = Usage.valueOf(value); 998 break; 999 case "formality": 1000 formality = Formality.valueOf(value); 1001 break; 1002 } 1003 } 1004 return new FormatParameters(order, length, usage, formality); 1005 } 1006 1007 // for thread-safe lazy evaluation 1008 private static class LazyEval { 1009 private static ImmutableSet<FormatParameters> DATA; 1010 private static ImmutableSet<FormatParameters> CLDR_DATA; 1011 static { 1012 Set<FormatParameters> _data = new LinkedHashSet<>(); 1013 Set<FormatParameters> _cldrdata = new LinkedHashSet<>(); 1014 for (Order order : Order.values()) { 1015 for (Length length : Length.values()) { 1016 if (order == Order.sorting) { _cldrdata.add(new FormatParameters(order, length, Usage.referring, Formality.formal))1017 _cldrdata.add(new FormatParameters(order, length, Usage.referring, Formality.formal)); _cldrdata.add(new FormatParameters(order, length, Usage.referring, Formality.informal))1018 _cldrdata.add(new FormatParameters(order, length, Usage.referring, Formality.informal)); 1019 } 1020 for (Formality formality : Formality.values()) { 1021 for (Usage usage : Usage.values()) { _data.add(new FormatParameters(order, length, usage, formality))1022 _data.add(new FormatParameters(order, length, usage, formality)); 1023 if (order != Order.sorting) { _cldrdata.add(new FormatParameters(order, length, usage, formality))1024 _cldrdata.add(new FormatParameters(order, length, usage, formality)); 1025 } 1026 } 1027 } 1028 } 1029 } 1030 DATA = ImmutableSet.copyOf(_data); 1031 CLDR_DATA = ImmutableSet.copyOf(_cldrdata); 1032 } 1033 } 1034 1035 /** 1036 * Returns all possible combinations of fields. 1037 * @return 1038 */ all()1039 static public ImmutableSet<FormatParameters> all() { 1040 return LazyEval.DATA; 1041 } 1042 1043 /** 1044 * Returns all possible combinations of fields supported by CLDR. 1045 * (the order=sorting combinations are abbreviated 1046 * @return 1047 */ allCldr()1048 static public ImmutableSet<FormatParameters> allCldr() { 1049 return LazyEval.CLDR_DATA; 1050 } 1051 1052 @Override compareTo(FormatParameters other)1053 public int compareTo(FormatParameters other) { 1054 return ComparisonChain.start() 1055 .compare(order, other.order) 1056 .compare(length, other.length) 1057 .compare(usage, other.usage) 1058 .compare(formality, other.formality) 1059 .result(); 1060 } 1061 toLabel()1062 public String toLabel() { 1063 StringBuilder sb = new StringBuilder(); 1064 addToLabel(order, sb); 1065 addToLabel(length, sb); 1066 addToLabel(usage, sb); 1067 addToLabel(formality, sb); 1068 return sb.length() == 0 ? "any" : sb.toString(); 1069 } 1070 addToLabel(T item, StringBuilder sb)1071 private <T> void addToLabel(T item, StringBuilder sb) { 1072 if (item != null) { 1073 if (sb.length() != 0) { 1074 sb.append('-'); 1075 } 1076 sb.append(item.toString()); 1077 } 1078 } 1079 1080 /** 1081 * Only used to add missing CLDR fields. 1082 * If an item is missing, get the best replacements. 1083 * @return 1084 */ getFallbacks()1085 public Iterable<FormatParameters> getFallbacks() { 1086 return ImmutableList.of( 1087 new FormatParameters(order, length, null, formality), 1088 new FormatParameters(order, length, usage, null), 1089 new FormatParameters(order, length, null, null), 1090 new FormatParameters(order, null, null, null), 1091 new FormatParameters(null, null, null, null) 1092 ); 1093 } 1094 1095 @Override equals(Object obj)1096 public boolean equals(Object obj) { 1097 FormatParameters that = (FormatParameters) obj; 1098 return Objects.equals(order, that.order) 1099 && Objects.equals(length, that.length) 1100 && Objects.equals(usage, that.usage) 1101 && Objects.equals(formality, that.formality); 1102 } 1103 @Override hashCode()1104 public int hashCode() { 1105 return (length == null ? 0 : length.hashCode()) 1106 ^ (formality == null ? 0 : formality.hashCode()) 1107 ^ (usage == null ? 0 : usage.hashCode()) 1108 ^ (order == null ? 0 : order.hashCode()); 1109 } 1110 } 1111 1112 1113 /** 1114 * Returns a match for the nameFormatParameters, or null if the parameterMatcherToNamePattern has no match. 1115 */ getBestMatchSet( ListMultimap<FormatParameters, NamePattern> parameterMatcherToNamePattern, FormatParameters nameFormatParameters)1116 public static Collection<NamePattern> getBestMatchSet( 1117 ListMultimap<FormatParameters, NamePattern> parameterMatcherToNamePattern, 1118 FormatParameters nameFormatParameters) { 1119 for (Entry<FormatParameters, Collection<NamePattern>> parametersAndPatterns : parameterMatcherToNamePattern.asMap().entrySet()) { 1120 FormatParameters parameters = parametersAndPatterns.getKey(); 1121 if (parameters.matches(nameFormatParameters)) { 1122 return parametersAndPatterns.getValue(); 1123 } 1124 } 1125 return null; // This will only happen if the NamePatternData is incomplete 1126 } 1127 1128 /** 1129 * Data that maps from NameFormatParameters and a NameObject to the best NamePattern. 1130 * It must be complete: that is, it must match every possible value. 1131 * Immutable 1132 * @internal 1133 * NOTE: CLDR needs access to this. 1134 */ 1135 public static class NamePatternData { 1136 private final ImmutableMap<ULocale, Order> localeToOrder; 1137 private final ImmutableListMultimap<FormatParameters, NamePattern> parameterMatcherToNamePattern; 1138 getBestMatch(NameObject nameObject, FormatParameters nameFormatParameters)1139 public NamePattern getBestMatch(NameObject nameObject, FormatParameters nameFormatParameters) { 1140 if (nameFormatParameters.order == null) { 1141 final Order mappedOrder = localeToOrder.get(nameObject.getNameLocale()); 1142 nameFormatParameters = nameFormatParameters.setOrder(mappedOrder == null ? Order.givenFirst : mappedOrder); 1143 } 1144 1145 NamePattern result = null; 1146 1147 Collection<NamePattern> namePatterns = getBestMatchSet(parameterMatcherToNamePattern, nameFormatParameters); 1148 if (namePatterns == null) { 1149 // Internal error, should never happen with valid data 1150 throw new IllegalArgumentException("Can't find " + nameFormatParameters + " in " + parameterMatcherToNamePattern); 1151 } 1152 Set<Field> nameFields = nameObject.getAvailableFields(); 1153 int bestMatchSize = -1; 1154 1155 for (NamePattern pattern : namePatterns) { 1156 Set<Field> patternFields = pattern.getFields(); 1157 1158 int matchSize = getIntersectionSize(nameFields, patternFields); 1159 1160 if ((matchSize > bestMatchSize) /* better match */ 1161 || (matchSize == bestMatchSize 1162 && patternFields.size() < result.getFieldsSize()) /* equal match, but less "extra" fields */) { 1163 bestMatchSize = matchSize; 1164 result = pattern; 1165 } 1166 } 1167 1168 return result; 1169 } 1170 1171 /** 1172 * Build the name pattern data. In the formatParametersToNamePattern: 1173 * <ul> 1174 * <li>Every possible FormatParameters value must match at least one FormatParameters</li> 1175 * <li>No FormatParameters is superfluous; the ones before it must not mask it.</li> 1176 * </ul> 1177 * The multimap values must retain the order they are built with! 1178 */ NamePatternData(ImmutableMap<ULocale, Order> localeToOrder, ListMultimap<FormatParameters, NamePattern> formatParametersToNamePattern)1179 public NamePatternData(ImmutableMap<ULocale, Order> localeToOrder, 1180 ListMultimap<FormatParameters, NamePattern> formatParametersToNamePattern) { 1181 1182 if (formatParametersToNamePattern == null || formatParametersToNamePattern.isEmpty()) { 1183 throw new IllegalArgumentException("formatParametersToNamePattern must be non-null, non-empty"); 1184 } 1185 1186 this.localeToOrder = localeToOrder == null ? ImmutableMap.of() : localeToOrder; 1187 1188 FormatParameters lastKey = null; 1189 Set<FormatParameters> remaining = new LinkedHashSet<>(FormatParameters.all()); 1190 1191 // check that parameters are complete, and that nothing is masked by anything previous 1192 1193 for (Entry<FormatParameters, Collection<NamePattern>> entry : formatParametersToNamePattern.asMap().entrySet()) { 1194 FormatParameters key = entry.getKey(); 1195 Collection<NamePattern> values = entry.getValue(); 1196 1197 // TODO Mark No FormatParameters should be completely masked by any previous ones 1198 1199 // The following code starts with a list of all the items, and removes any that match 1200 int matchCount = 0; 1201 for (Iterator<FormatParameters> rest = remaining.iterator(); rest.hasNext(); ) { 1202 FormatParameters item = rest.next(); 1203 if (key.matches(item)) { 1204 rest.remove(); 1205 ++matchCount; 1206 if (DEBUG) { 1207 System.out.println(" * " + item + " matches " + key); 1208 } 1209 } 1210 } 1211 if (matchCount == 0) { 1212 key.equals(lastKey); 1213 throw new IllegalArgumentException("key is masked by previous values: " + key 1214 + ",\n\t" + JOIN_LFTB.join(formatParametersToNamePattern.entries())); 1215 } 1216 1217 // Each entry in FormatParameters must have at least one NamePattern 1218 if (values.isEmpty()) { 1219 throw new IllegalArgumentException("key has no values: " + key); 1220 } 1221 lastKey = key; 1222 } 1223 if (!remaining.isEmpty()) { 1224 throw new IllegalArgumentException("values are not complete; they don't match:\n\t" 1225 + JOIN_LFTB.join(remaining)); 1226 } 1227 this.parameterMatcherToNamePattern = ImmutableListMultimap.copyOf(formatParametersToNamePattern); 1228 } 1229 getLocaleToOrder()1230 public Map<ULocale, Order> getLocaleToOrder() { 1231 return localeToOrder; 1232 } 1233 1234 /** 1235 * Build from strings for ease of testing 1236 */ NamePatternData(ImmutableMap<ULocale, Order> localeToOrder, String...formatParametersToNamePatterns )1237 public NamePatternData(ImmutableMap<ULocale, Order> localeToOrder, String...formatParametersToNamePatterns ) { 1238 this(localeToOrder, parseFormatParametersToNamePatterns(formatParametersToNamePatterns)); 1239 } 1240 parseFormatParametersToNamePatterns(String... formatParametersToNamePatterns)1241 private static ListMultimap<FormatParameters, NamePattern> parseFormatParametersToNamePatterns(String... formatParametersToNamePatterns) { 1242 int count = formatParametersToNamePatterns.length; 1243 if ((count % 2) != 0) { 1244 throw new IllegalArgumentException("Must have even number of strings, fields => pattern: " + Arrays.asList(formatParametersToNamePatterns)); 1245 } 1246 ListMultimap<FormatParameters, NamePattern> _formatParametersToNamePatterns = LinkedListMultimap.create(); 1247 int rank = 0; 1248 for (int i = 0; i < count; i += 2) { 1249 FormatParameters pm = FormatParameters.from(formatParametersToNamePatterns[i]); 1250 NamePattern np = NamePattern.from(rank++, formatParametersToNamePatterns[i+1]); 1251 _formatParametersToNamePatterns.put(pm, np); 1252 } 1253 addMissing(_formatParametersToNamePatterns); 1254 1255 return _formatParametersToNamePatterns; 1256 } 1257 1258 @Override toString()1259 public String toString() { 1260 return "{" + (localeToOrder.isEmpty() ? "" : "localeToOrder=" + localeToOrder + "\n\t\t") 1261 + show(parameterMatcherToNamePattern) + "}"; 1262 } 1263 show(ImmutableListMultimap<FormatParameters, NamePattern> multimap)1264 private String show(ImmutableListMultimap<FormatParameters, NamePattern> multimap) { 1265 String result = multimap.asMap().toString(); 1266 return result.replace("], ", "],\n\t\t\t"); // for readability 1267 } 1268 1269 /** 1270 * For testing 1271 * @internal 1272 */ getMatcherToPatterns()1273 public ImmutableListMultimap<FormatParameters, NamePattern> getMatcherToPatterns() { 1274 return parameterMatcherToNamePattern; 1275 } 1276 } 1277 1278 /** 1279 * Interface used by the person name formatter to access name field values. 1280 * It provides access not only to values for modified fields directly supported by the NameObject, 1281 * but also to values that may be produced or modified by the Name Object. 1282 */ 1283 public static interface NameObject { 1284 /** 1285 * Returns the locale of the name, or null if not available. 1286 * NOTE: this is not the same as the locale of the person name formatter. 1287 */ getNameLocale()1288 public ULocale getNameLocale(); 1289 /** 1290 * Returns a mapping for the modified fields directly supported to their values. 1291 */ getModifiedFieldToValue()1292 public ImmutableMap<ModifiedField, String> getModifiedFieldToValue(); 1293 /** 1294 * Returns the set of fields directly supported. Should be overridden for speed. 1295 * It returns the same value as getModifiedFieldToValue().keySet().stream().map(x -> x.field).collect(Collectors.toSet()), 1296 * but may be optimized. 1297 */ getAvailableFields()1298 public Set<Field> getAvailableFields(); 1299 /** 1300 * Returns the best available value for the modified field, or null if nothing is available. 1301 * Null is returned in all and only those cases where !getAvailableFields().contains(modifiedField.field) 1302 * @param modifiedField the input modified field, for which the best value is fetched. 1303 * @param remainingModifers contains the set of modifiers that were not handled by this method. 1304 * The calling code may apply fallback algorithms based on these values. 1305 * @return 1306 */ getBestValue(ModifiedField modifiedField, Set<Modifier> remainingModifers)1307 public String getBestValue(ModifiedField modifiedField, Set<Modifier> remainingModifers); 1308 } 1309 1310 private final NamePatternData namePatternMap; 1311 private final FallbackFormatter fallbackFormatter; 1312 1313 @Override toString()1314 public String toString() { 1315 return namePatternMap.toString(); 1316 } 1317 1318 /** 1319 * @internal 1320 */ getNamePatternData()1321 public final NamePatternData getNamePatternData() { 1322 return namePatternMap; 1323 } 1324 1325 /** 1326 * Create a formatter directly from data. 1327 * NOTE CLDR will need to have access to this creation method. 1328 * @internal 1329 */ PersonNameFormatter(NamePatternData namePatternMap, FallbackFormatter fallbackFormatter)1330 public PersonNameFormatter(NamePatternData namePatternMap, FallbackFormatter fallbackFormatter) { 1331 this.namePatternMap = namePatternMap; 1332 this.fallbackFormatter = fallbackFormatter; 1333 } 1334 1335 /** 1336 * Create a formatter from a CLDR file. 1337 * @internal 1338 */ PersonNameFormatter(CLDRFile cldrFile)1339 public PersonNameFormatter(CLDRFile cldrFile) { 1340 ListMultimap<FormatParameters, NamePattern> formatParametersToNamePattern = LinkedListMultimap.create(); 1341 Set<Pair<FormatParameters, NamePattern>> ordered = new TreeSet<>(); 1342 String initialPattern = null; 1343 String initialSequencePattern = null; 1344 String foreignSpaceReplacement = null; 1345 Map<ULocale, Order> _localeToOrder = new TreeMap<>(); 1346 1347 // read out the data and order it properly 1348 for (String path : cldrFile) { 1349 if (path.startsWith("//ldml/personNames") && !path.endsWith("/alias")) { 1350 String value = cldrFile.getStringValue(path); 1351 //System.out.println(path + ",\t" + value); 1352 XPathParts parts = XPathParts.getFrozenInstance(path); 1353 switch(parts.getElement(2)) { 1354 case "personName": 1355 Pair<FormatParameters, NamePattern> pair = fromPathValue(parts, value); 1356 boolean added = ordered.add(pair); 1357 if (!added) { 1358 throw new IllegalArgumentException("Duplicate path/value " + pair); 1359 } 1360 break; 1361 case "initialPattern": 1362 //ldml/personNames/initialPattern[@type="initial"] 1363 String type = parts.getAttributeValue(-1, "type"); 1364 switch(type) { 1365 case "initial": initialPattern = value; break; 1366 case "initialSequence": initialSequencePattern = value; break; 1367 default: throw new IllegalArgumentException("Unexpected path: " + path); 1368 } 1369 break; 1370 case "nameOrderLocales": 1371 //ldml/personNames/nameOrderLocales[@order="givenFirst"], value = list of locales 1372 for (String locale : SPLIT_SPACE.split(value)) { 1373 Order order = Order.valueOf(parts.getAttributeValue(-1, "order")); 1374 _localeToOrder.put(new ULocale(locale), order); 1375 } 1376 break; 1377 case "foreignSpaceReplacement": 1378 foreignSpaceReplacement = value; 1379 break; 1380 case "sampleName": 1381 // skip 1382 break; 1383 default: throw new IllegalArgumentException("Unexpected path: " + path); 1384 } 1385 } 1386 } 1387 for (Pair<FormatParameters, NamePattern> entry : ordered) { 1388 formatParametersToNamePattern.put(entry.getFirst(), entry.getSecond()); 1389 } 1390 addMissing(formatParametersToNamePattern); 1391 1392 ImmutableMap<ULocale, Order> localeToOrder = ImmutableMap.copyOf(_localeToOrder); 1393 this.namePatternMap = new NamePatternData(localeToOrder, formatParametersToNamePattern); 1394 this.fallbackFormatter = new FallbackFormatter(new ULocale(cldrFile.getLocaleID()), 1395 initialPattern, initialSequencePattern, foreignSpaceReplacement, false); 1396 } 1397 1398 /** 1399 * Add items that are not in the pattern, using the fallbacks. 1400 * TODO: can generalize; if we have order=x ... formality=y, 1401 * and a later value that matches except with formality=null, 1402 * and nothing in between matches, can drop the first 1403 */ addMissing(ListMultimap<FormatParameters, NamePattern> formatParametersToNamePattern)1404 private static void addMissing(ListMultimap<FormatParameters, NamePattern> formatParametersToNamePattern) { 1405 for (FormatParameters formatParameters : FormatParameters.all()) { 1406 Collection<NamePattern> namePatterns = getBestMatchSet(formatParametersToNamePattern, formatParameters); 1407 if (namePatterns == null) { 1408 for (FormatParameters fallback : formatParameters.getFallbacks()) { 1409 namePatterns = getBestMatchSet(formatParametersToNamePattern, fallback); 1410 if (namePatterns != null) { 1411 formatParametersToNamePattern.putAll(fallback, namePatterns); 1412 break; 1413 } 1414 } 1415 if (namePatterns == null) { 1416 throw new IllegalArgumentException("Missing fallback for " + formatParameters); 1417 } 1418 } 1419 } 1420 } 1421 1422 /** 1423 * Main function for formatting names. 1424 * @param nameObject — A name object, which supplies data. 1425 * @param nameFormatParameters - The specification of which parameters are desired. 1426 * @return formatted string 1427 * TODO make most public methods be @internal (public but just for testing). 1428 * The NameObject and FormatParameters are exceptions. 1429 * TODO decide how to allow clients to customize data in the name object. Options: 1430 * a. Leave it to implementers (eg they can write a FilteredNameObject that changes some fields). 1431 * b. Pass in explicit override parameters, like whether to uppercase the surname in surnameFirst. 1432 * TODO decide whether/how to allow clients to customize the built-in data (namePatternData, fallbackFormatter) 1433 * a. CLDR will need to be be able to customize it completely. 1434 * b. Clients may want to set the contextual uppercasing of surnames, the handling of which locales cause surnameFirst, etc. 1435 */ format(NameObject nameObject, FormatParameters nameFormatParameters)1436 public String format(NameObject nameObject, FormatParameters nameFormatParameters) { 1437 // look through the namePatternMap to find the best match for the set of modifiers and the available nameObject fields 1438 NamePattern bestPattern = namePatternMap.getBestMatch(nameObject, nameFormatParameters); 1439 // then format using it 1440 return bestPattern.format(nameObject, nameFormatParameters, fallbackFormatter); 1441 } 1442 1443 /** 1444 * For testing 1445 * @internal 1446 */ getBestMatchSet(FormatParameters nameFormatParameters)1447 public Collection<NamePattern> getBestMatchSet(FormatParameters nameFormatParameters) { 1448 return getBestMatchSet(namePatternMap.parameterMatcherToNamePattern, nameFormatParameters); 1449 } 1450 1451 /** 1452 * Utility for constructing data from path and value. 1453 * @internal 1454 */ fromPathValue(XPathParts parts, String value)1455 public static Pair<FormatParameters, NamePattern> fromPathValue(XPathParts parts, String value) { 1456 //ldml/personNames/personName[@length="long"][@usage="referring"][@order="sorting"]/namePattern[alt="2"] 1457 // value = {surname}, {given} {given2} {suffix} 1458 final String altValue = parts.getAttributeValue(-1, "alt"); 1459 int rank = altValue == null ? 0 : Integer.parseInt(altValue); 1460 FormatParameters pm = new FormatParameters( 1461 Order.from(parts.getAttributeValue(-2, "order")), 1462 Length.from(parts.getAttributeValue(-2, "length")), 1463 Usage.from(parts.getAttributeValue(-2, "usage")), 1464 Formality.from(parts.getAttributeValue(-2, "formality")) 1465 ); 1466 1467 NamePattern np = NamePattern.from(rank, value); 1468 if (np.toString().isBlank()) { 1469 throw new IllegalArgumentException("No empty patterns allowed: " + pm); 1470 } 1471 return Pair.of(pm, np); 1472 } 1473 1474 /** 1475 * Special data for vetters, to how what foreign names would format 1476 */ 1477 private static final Map<String, SimpleNameObject> FOREIGN_NAME_FOR_NON_SPACING; 1478 static { 1479 // code given surname 1480 final String[][] specials = { 1481 {"th", "อัลเบิร์ต", "ไอน์สไตน์"}, 1482 {"my", "အဲလ်ဘတ်", "အိုင်းစတိုင်း"}, 1483 {"km", "អាល់បឺត", "អែងស្តែង"}, 1484 {"ko", "알베르트", "아인슈타인"}, 1485 {"ja", "アルベルト", "アインシュタイン"}, 1486 {"zh", "阿尔伯特", "爱因斯坦"} 1487 }; 1488 Map<String, SimpleNameObject> temp = Maps.newLinkedHashMap(); 1489 for (String[] row : specials) { temp.put(row[0], specialNameOf(row))1490 temp.put(row[0], specialNameOf(row)); 1491 } 1492 FOREIGN_NAME_FOR_NON_SPACING = ImmutableMap.copyOf(temp); 1493 } 1494 specialNameOf(String[] row)1495 private static SimpleNameObject specialNameOf(String[] row) { 1496 return new SimpleNameObject(new ULocale("de_CH"), ImmutableMap.of( 1497 ModifiedField.from("given"), row[1], 1498 ModifiedField.from("surname"), row[2] 1499 )); 1500 } 1501 1502 /** 1503 * Utility for getting sample names. DOES NOT CACHE 1504 * @param cldrFile 1505 * @return 1506 * @internal 1507 */ loadSampleNames(CLDRFile cldrFile)1508 public static Map<SampleType, SimpleNameObject> loadSampleNames(CLDRFile cldrFile) { 1509 M3<SampleType, ModifiedField, String> names = ChainedMap.of(new TreeMap<SampleType, Object>(), new TreeMap<ModifiedField, Object>(), String.class); 1510 for (String path : cldrFile) { 1511 if (path.startsWith("//ldml/personNames/sampleName")) { 1512 //ldml/personNames/sampleName[@item="full"]/nameField[@type="prefix"] 1513 String value = cldrFile.getStringValue(path); 1514 if (value != null && !value.equals("∅∅∅")) { 1515 XPathParts parts = XPathParts.getFrozenInstance(path); 1516 names.put(SampleType.valueOf(parts.getAttributeValue(-2, "item")), ModifiedField.from(parts.getAttributeValue(-1, "type")), value); 1517 } 1518 } 1519 } 1520 1521 Map<SampleType, SimpleNameObject> result = new TreeMap<>(); 1522 for (Entry<SampleType, Map<ModifiedField, String>> entry : names) { 1523 SimpleNameObject name = new SimpleNameObject(new ULocale(cldrFile.getLocaleID()), entry.getValue()); 1524 result.put(entry.getKey(), name); 1525 } 1526 1527 // add special foreign name for non-spacing languages 1528 LanguageTagParser ltp = new LanguageTagParser(); 1529 SimpleNameObject extraName = FOREIGN_NAME_FOR_NON_SPACING.get(ltp.set(cldrFile.getLocaleID()).getLanguageScript()); 1530 if (extraName != null) { 1531 result.put(SampleType.foreign, extraName); 1532 } 1533 return ImmutableMap.copyOf(result); 1534 } 1535 1536 /** 1537 * General Utility 1538 * Avoids object creation in Sets.intersection(a,b).size() 1539 */ getIntersectionSize(Set<T> set1, Set<T> set2)1540 public static <T> int getIntersectionSize(Set<T> set1, Set<T> set2) { 1541 int size = 0; 1542 for (T e : set1) { 1543 if (set2.contains(e)) { 1544 size++; 1545 } 1546 } 1547 return size; 1548 } 1549 1550 private static final CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE = 1551 CaseMap.toTitle().wholeString().noLowercase(); 1552 getFallbackInfo()1553 public FallbackFormatter getFallbackInfo() { 1554 return fallbackFormatter; 1555 } 1556 } 1557