1 // © 2022 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package com.ibm.icu.impl.personname; 4 5 import static com.ibm.icu.util.UResourceBundle.ARRAY; 6 import static com.ibm.icu.util.UResourceBundle.STRING; 7 8 import java.util.Arrays; 9 import java.util.Collections; 10 import java.util.HashSet; 11 import java.util.Locale; 12 import java.util.Set; 13 14 import com.ibm.icu.impl.ICUData; 15 import com.ibm.icu.impl.ICUResourceBundle; 16 import com.ibm.icu.lang.UScript; 17 import com.ibm.icu.text.PersonName; 18 import com.ibm.icu.text.PersonNameFormatter; 19 import com.ibm.icu.util.ULocale; 20 import com.ibm.icu.util.UResourceBundle; 21 22 /** 23 * Actual implementation class for PersonNameFormatter. 24 */ 25 public class PersonNameFormatterImpl { 26 private final Locale locale; 27 private final PersonNamePattern[] gnFirstPatterns; 28 private final PersonNamePattern[] snFirstPatterns; 29 private final Set<String> gnFirstLocales; 30 private final Set<String> snFirstLocales; 31 private final String initialPattern; 32 private final String initialSequencePattern; 33 private final boolean capitalizeSurname; 34 private final String foreignSpaceReplacement; 35 private final boolean formatterLocaleUsesSpaces; 36 private final PersonNameFormatter.Length length; 37 private final PersonNameFormatter.Usage usage; 38 private final PersonNameFormatter.Formality formality; 39 private final Set<PersonNameFormatter.Options> options; 40 PersonNameFormatterImpl(Locale locale, PersonNameFormatter.Length length, PersonNameFormatter.Usage usage, PersonNameFormatter.Formality formality, Set<PersonNameFormatter.Options> options)41 public PersonNameFormatterImpl(Locale locale, 42 PersonNameFormatter.Length length, 43 PersonNameFormatter.Usage usage, 44 PersonNameFormatter.Formality formality, 45 Set<PersonNameFormatter.Options> options) { 46 // null for `options` is the same as the empty set 47 if (options == null) { 48 options = new HashSet<>(); 49 } 50 51 // save off our creation parameters (these are only used if we have to create a second formatter) 52 this.length = length; 53 this.usage = usage; 54 this.formality = formality; 55 this.options = options; 56 57 // load simple property values from the resource bundle (or the options set) 58 ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, locale); 59 this.locale = locale; 60 this.initialPattern = rb.getStringWithFallback("personNames/initialPattern/initial"); 61 this.initialSequencePattern = rb.getStringWithFallback("personNames/initialPattern/initialSequence"); 62 this.capitalizeSurname = options.contains(PersonNameFormatter.Options.SURNAME_ALLCAPS); 63 this.foreignSpaceReplacement = rb.getStringWithFallback("personNames/foreignSpaceReplacement"); 64 this.formatterLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(locale.getLanguage()); 65 66 // asjust for combinations of parameters that don't make sense in practice 67 if (usage == PersonNameFormatter.Usage.MONOGRAM) { 68 // we don't support SORTING in conjunction with MONOGRAM; if the caller passes in SORTING, remove it from 69 // the options list 70 options.remove(PersonNameFormatter.Options.SORTING); 71 } else if (options.contains(PersonNameFormatter.Options.SORTING)) { 72 // we only support SORTING in conjunction with REFERRING; if the caller passes in ADDRESSING, treat it 73 // the same as REFERRING 74 usage = PersonNameFormatter.Usage.REFERRING; 75 } 76 77 // load the actual formatting patterns-- since we don't know the name order until formatting time (it can be 78 // different for different names), load patterns for both given-first and surname-first names. (If the user has 79 // specified SORTING, we don't need to do this-- we just load the "sorting" patterns and ignore the name's order.) 80 final String RESOURCE_PATH_PREFIX = "personNames/namePattern/"; 81 String resourceNameBody = length.toString().toLowerCase() + "-" + usage.toString().toLowerCase() + "-" 82 + formality.toString().toLowerCase(); 83 if (!options.contains(PersonNameFormatter.Options.SORTING)) { 84 ICUResourceBundle gnFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "givenFirst-" + resourceNameBody); 85 ICUResourceBundle snFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "surnameFirst-" + resourceNameBody); 86 87 gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(gnFirstResource), this); 88 snFirstPatterns = PersonNamePattern.makePatterns(asStringArray(snFirstResource), this); 89 90 gnFirstLocales = new HashSet<>(); 91 Collections.addAll(gnFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/givenFirst"))); 92 snFirstLocales = new HashSet<>(); 93 Collections.addAll(snFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/surnameFirst"))); 94 } else { 95 ICUResourceBundle patternResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "sorting-" + resourceNameBody); 96 97 gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(patternResource), this); 98 snFirstPatterns = null; 99 gnFirstLocales = null; 100 snFirstLocales = null; 101 } 102 } 103 104 /** 105 * THIS IS A DUMMY CONSTRUCTOR JUST FOR THE USE OF THE UNIT TESTS TO CHECK SOME OF THE INTERNAL IMPLEMENTATION! 106 */ PersonNameFormatterImpl(Locale locale, String[] patterns)107 public PersonNameFormatterImpl(Locale locale, String[] patterns) { 108 // first, set dummy values for the other fields 109 snFirstPatterns = null; 110 gnFirstLocales = null; 111 snFirstLocales = null; 112 length = PersonNameFormatter.Length.MEDIUM; 113 usage = PersonNameFormatter.Usage.REFERRING; 114 formality = PersonNameFormatter.Formality.FORMAL; 115 options = Collections.emptySet(); 116 initialPattern = "{0}."; 117 initialSequencePattern = "{0} {1}"; 118 capitalizeSurname = false; 119 foreignSpaceReplacement = " "; 120 formatterLocaleUsesSpaces = true; 121 122 // then, set values for the fields we actually care about 123 this.locale = locale; 124 gnFirstPatterns = PersonNamePattern.makePatterns(patterns, this); 125 126 } 127 formatToString(PersonName name)128 public String formatToString(PersonName name) { 129 // TODO: Should probably return a FormattedPersonName object 130 131 // if the formatter is for a language that doesn't use spaces between words and the name is from a language 132 // that does, create a formatter for the NAME'S locale and use THAT to format the name 133 Locale nameLocale = getNameLocale(name); 134 boolean nameLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(nameLocale.getLanguage()); 135 if (!formatterLocaleUsesSpaces && nameLocaleUsesSpaces) { 136 PersonNameFormatterImpl nativeFormatter = new PersonNameFormatterImpl(nameLocale, this.length, 137 this.usage, this.formality, this.options); 138 String result = nativeFormatter.formatToString(name); 139 140 // BUT, if the name is actually written in the formatter locale's script, replace any spaces in the name 141 // with the foreignSpaceReplacement character 142 if (!foreignSpaceReplacement.equals(" ") && scriptMatchesLocale(result, this.locale)) { 143 result = result.replace(" ", this.foreignSpaceReplacement); 144 } 145 return result; 146 } 147 148 // if we get down to here, we're just doing normal formatting-- if we have both given-first and surname-first 149 // rules, choose which one to use based on the name's locale and preferred field order 150 if (snFirstPatterns == null || nameIsGnFirst(name)) { 151 return getBestPattern(gnFirstPatterns, name).format(name); 152 } else { 153 return getBestPattern(snFirstPatterns, name).format(name); 154 } 155 } 156 getLocale()157 public Locale getLocale() { 158 return locale; 159 } 160 getLength()161 public PersonNameFormatter.Length getLength() { return length; } 162 getUsage()163 public PersonNameFormatter.Usage getUsage() { return usage; } 164 getFormality()165 public PersonNameFormatter.Formality getFormality() { return formality; } 166 getOptions()167 public Set<PersonNameFormatter.Options> getOptions() { return options; } 168 getInitialPattern()169 public String getInitialPattern() { 170 return initialPattern; 171 } 172 getInitialSequencePattern()173 public String getInitialSequencePattern() { 174 return initialSequencePattern; 175 } 176 shouldCapitalizeSurname()177 public boolean shouldCapitalizeSurname() { 178 return capitalizeSurname; 179 } 180 181 private final Set<String> LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "th", "yue", "km", "lo")); 182 183 /** 184 * Returns the value of the resource, as a string array. 185 * @param resource An ICUResourceBundle of type STRING or ARRAY. If ARRAY, this function just returns it 186 * as a string array. If STRING, it returns a one-element array containing that string. 187 * @return The resource's value, as an array of Strings. 188 */ asStringArray(ICUResourceBundle resource)189 private String[] asStringArray(ICUResourceBundle resource) { 190 if (resource.getType() == STRING) { 191 return new String[] { resource.getString() }; 192 } else if (resource.getType() == ARRAY){ 193 return resource.getStringArray(); 194 } else { 195 throw new IllegalStateException("Unsupported resource type " + resource.getType()); 196 } 197 } 198 199 /** 200 * Returns the field order to use when formatting this name, taking into account the name's preferredOrder 201 * field, as well as the name and formatter's respective locales. 202 * @param name The name to be formatted. 203 * @return If true, use given-first order to format the name; if false, use surname-first order. 204 */ nameIsGnFirst(PersonName name)205 private boolean nameIsGnFirst(PersonName name) { 206 // the name can declare its order-- check that first (it overrides any locale-based calculation) 207 Set<PersonName.FieldModifier> modifiers = new HashSet<>(); 208 String preferredOrder = name.getFieldValue(PersonName.NameField.PREFERRED_ORDER, modifiers); 209 if (preferredOrder != null) { 210 if (preferredOrder.equals("givenFirst")) { 211 return true; 212 } else if (preferredOrder.equals("surnameFirst")) { 213 return false; 214 } else { 215 throw new IllegalArgumentException("Illegal preferredOrder value " + preferredOrder); 216 } 217 } 218 219 String localeStr = getNameLocale(name).toString(); 220 do { 221 if (gnFirstLocales.contains(localeStr)) { 222 return true; 223 } else if (snFirstLocales.contains(localeStr)) { 224 return false; 225 } 226 227 int lastUnderbarPos = localeStr.lastIndexOf("_"); 228 if (lastUnderbarPos >= 0) { 229 localeStr = localeStr.substring(0, lastUnderbarPos); 230 } else { 231 localeStr = "root"; 232 } 233 } while (!localeStr.equals("root")); 234 235 // should never get here-- "root" should always be in one of the locales 236 return true; 237 } 238 getBestPattern(PersonNamePattern[] patterns, PersonName name)239 private PersonNamePattern getBestPattern(PersonNamePattern[] patterns, PersonName name) { 240 // early out if there's only one pattern 241 if (patterns.length == 1) { 242 return patterns[0]; 243 } else { 244 // if there's more than one pattern, return the one that contains the greatest number of fields that 245 // actually have values in `name`. If there's a tie, return the pattern that contains the lowest number 246 // of fields that DON'T have values in `name`. 247 int maxPopulatedFields = 0; 248 int minEmptyFields = Integer.MAX_VALUE; 249 PersonNamePattern bestPattern = null; 250 251 for (PersonNamePattern pattern : patterns) { 252 int populatedFields = pattern.numPopulatedFields(name); 253 int emptyFields = pattern.numEmptyFields(name); 254 if (populatedFields > maxPopulatedFields) { 255 maxPopulatedFields = populatedFields; 256 minEmptyFields = emptyFields; 257 bestPattern = pattern; 258 } else if (populatedFields == maxPopulatedFields && emptyFields < minEmptyFields) { 259 minEmptyFields = emptyFields; 260 bestPattern = pattern; 261 } 262 } 263 return bestPattern; 264 } 265 } 266 267 /** 268 * Internal function to figure out the name's locale when the name doesn't specify it. 269 * (Note that this code assumes that if the locale is specified, it includes a language 270 * code.) 271 * @param name The name for which we need the locale 272 * @return The name's (real or guessed) locale. 273 */ getNameLocale(PersonName name)274 private Locale getNameLocale(PersonName name) { 275 // if the name specifies its locale, we can just return it 276 Locale nameLocale = name.getNameLocale(); 277 if (nameLocale == null) { 278 // if not, we look at the characters in the name. If their script matches the default script for the formatter's 279 // locale, we use the formatter's locale as the name's locale 280 int formatterScript = UScript.getCodeFromName(ULocale.addLikelySubtags(ULocale.forLocale(locale)).getScript()); 281 String givenName = name.getFieldValue(PersonName.NameField.GIVEN, new HashSet<PersonName.FieldModifier>()); 282 int nameScript = UScript.INVALID_CODE; 283 for (int i = 0; nameScript == UScript.INVALID_CODE && i < givenName.length(); i++) { 284 // the script of the name is the script of the first character in the name whose script isn't 285 // COMMON or INHERITED 286 int script = UScript.getScript(givenName.charAt(i)); 287 if (script != UScript.COMMON && script != UScript.INHERITED) { 288 nameScript = script; 289 } 290 } 291 if (formatterScript == nameScript) { 292 nameLocale = this.locale; 293 } else { 294 // if the name's script is different from the formatter's script, we use addLikelySubtags() to find the 295 // default language for the name's script and use THAT as the name's locale 296 nameLocale = new Locale(ULocale.addLikelySubtags(new ULocale("und_" + UScript.getShortName(nameScript))).getLanguage()); 297 } 298 // TODO: This algorithm has a few deficiencies: First, it assumes the script of the string is the script of the first 299 // character in the string that's not COMMON or INHERITED. This won't work well for some languages, such as Japanese, 300 // that use multiple scripts. Doing better would require adding a new getScript(String) method on UScript, which 301 // might be something we want. Second, we only look at the given-name field. This field should always be populated, 302 // but if it isn't, we're stuck. Looking at all the fields requires API on PersonName that we don't need anywhere 303 // else. 304 } 305 return nameLocale; 306 } 307 308 /** 309 * Returns true if the script of `s` is one of the default scripts for `locale`. 310 * This function only checks the script of the first character whose script isn't "common," 311 * so it probably won't work right on mixed-script strings. 312 */ scriptMatchesLocale(String s, Locale locale)313 private boolean scriptMatchesLocale(String s, Locale locale) { 314 int[] localeScripts = UScript.getCode(locale); 315 int stringScript = UScript.COMMON; 316 for (int i = 0; stringScript == UScript.COMMON && i < s.length(); i++) { 317 char c = s.charAt(i); 318 stringScript = UScript.getScript(c); 319 } 320 321 for (int localeScript : localeScripts) { 322 if (localeScript == stringScript) { 323 return true; 324 } 325 } 326 return false; 327 } 328 } 329