• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2022 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package com.ibm.icu.impl.personname;
4 
5 import static com.ibm.icu.util.UResourceBundle.ARRAY;
6 import static com.ibm.icu.util.UResourceBundle.STRING;
7 
8 import java.util.Arrays;
9 import java.util.Collections;
10 import java.util.HashSet;
11 import java.util.Locale;
12 import java.util.Set;
13 
14 import com.ibm.icu.impl.ICUData;
15 import com.ibm.icu.impl.ICUResourceBundle;
16 import com.ibm.icu.lang.UScript;
17 import com.ibm.icu.text.PersonName;
18 import com.ibm.icu.text.PersonNameFormatter;
19 import com.ibm.icu.util.ULocale;
20 import com.ibm.icu.util.UResourceBundle;
21 
22 /**
23  * Actual implementation class for PersonNameFormatter.
24  */
25 public class PersonNameFormatterImpl {
26     private final Locale locale;
27     private final PersonNamePattern[] gnFirstPatterns;
28     private final PersonNamePattern[] snFirstPatterns;
29     private final Set<String> gnFirstLocales;
30     private final Set<String> snFirstLocales;
31     private final String initialPattern;
32     private final String initialSequencePattern;
33     private final boolean capitalizeSurname;
34     private final String foreignSpaceReplacement;
35     private final boolean formatterLocaleUsesSpaces;
36     private final PersonNameFormatter.Length length;
37     private final PersonNameFormatter.Usage usage;
38     private final PersonNameFormatter.Formality formality;
39     private final Set<PersonNameFormatter.Options> options;
40 
PersonNameFormatterImpl(Locale locale, PersonNameFormatter.Length length, PersonNameFormatter.Usage usage, PersonNameFormatter.Formality formality, Set<PersonNameFormatter.Options> options)41     public PersonNameFormatterImpl(Locale locale,
42                                    PersonNameFormatter.Length length,
43                                    PersonNameFormatter.Usage usage,
44                                    PersonNameFormatter.Formality formality,
45                                    Set<PersonNameFormatter.Options> options) {
46         // null for `options` is the same as the empty set
47         if (options == null) {
48             options = new HashSet<>();
49         }
50 
51         // save off our creation parameters (these are only used if we have to create a second formatter)
52         this.length = length;
53         this.usage = usage;
54         this.formality = formality;
55         this.options = options;
56 
57         // load simple property values from the resource bundle (or the options set)
58         ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, locale);
59         this.locale = locale;
60         this.initialPattern = rb.getStringWithFallback("personNames/initialPattern/initial");
61         this.initialSequencePattern = rb.getStringWithFallback("personNames/initialPattern/initialSequence");
62         this.capitalizeSurname = options.contains(PersonNameFormatter.Options.SURNAME_ALLCAPS);
63         this.foreignSpaceReplacement = rb.getStringWithFallback("personNames/foreignSpaceReplacement");
64         this.formatterLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(locale.getLanguage());
65 
66         // asjust for combinations of parameters that don't make sense in practice
67         if (usage == PersonNameFormatter.Usage.MONOGRAM) {
68             // we don't support SORTING in conjunction with MONOGRAM; if the caller passes in SORTING, remove it from
69             // the options list
70             options.remove(PersonNameFormatter.Options.SORTING);
71         } else if (options.contains(PersonNameFormatter.Options.SORTING)) {
72             // we only support SORTING in conjunction with REFERRING; if the caller passes in ADDRESSING, treat it
73             // the same as REFERRING
74             usage = PersonNameFormatter.Usage.REFERRING;
75         }
76 
77         // load the actual formatting patterns-- since we don't know the name order until formatting time (it can be
78         // different for different names), load patterns for both given-first and surname-first names.  (If the user has
79         // specified SORTING, we don't need to do this-- we just load the "sorting" patterns and ignore the name's order.)
80         final String RESOURCE_PATH_PREFIX = "personNames/namePattern/";
81         String resourceNameBody = length.toString().toLowerCase() + "-" + usage.toString().toLowerCase() + "-"
82                 + formality.toString().toLowerCase();
83         if (!options.contains(PersonNameFormatter.Options.SORTING)) {
84             ICUResourceBundle gnFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "givenFirst-" + resourceNameBody);
85             ICUResourceBundle snFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "surnameFirst-" + resourceNameBody);
86 
87             gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(gnFirstResource), this);
88             snFirstPatterns = PersonNamePattern.makePatterns(asStringArray(snFirstResource), this);
89 
90             gnFirstLocales = new HashSet<>();
91             Collections.addAll(gnFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/givenFirst")));
92             snFirstLocales = new HashSet<>();
93             Collections.addAll(snFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/surnameFirst")));
94         } else {
95             ICUResourceBundle patternResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "sorting-" + resourceNameBody);
96 
97             gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(patternResource), this);
98             snFirstPatterns = null;
99             gnFirstLocales = null;
100             snFirstLocales = null;
101         }
102     }
103 
104     /**
105      * THIS IS A DUMMY CONSTRUCTOR JUST FOR THE USE OF THE UNIT TESTS TO CHECK SOME OF THE INTERNAL IMPLEMENTATION!
106      */
PersonNameFormatterImpl(Locale locale, String[] patterns)107     public PersonNameFormatterImpl(Locale locale, String[] patterns) {
108         // first, set dummy values for the other fields
109         snFirstPatterns = null;
110         gnFirstLocales = null;
111         snFirstLocales = null;
112         length = PersonNameFormatter.Length.MEDIUM;
113         usage = PersonNameFormatter.Usage.REFERRING;
114         formality = PersonNameFormatter.Formality.FORMAL;
115         options = Collections.emptySet();
116         initialPattern = "{0}.";
117         initialSequencePattern = "{0} {1}";
118         capitalizeSurname = false;
119         foreignSpaceReplacement = " ";
120         formatterLocaleUsesSpaces = true;
121 
122         // then, set values for the fields we actually care about
123         this.locale = locale;
124         gnFirstPatterns = PersonNamePattern.makePatterns(patterns, this);
125 
126     }
127 
formatToString(PersonName name)128     public String formatToString(PersonName name) {
129         // TODO: Should probably return a FormattedPersonName object
130 
131         // if the formatter is for a language that doesn't use spaces between words and the name is from a language
132         // that does, create a formatter for the NAME'S locale and use THAT to format the name
133         Locale nameLocale = getNameLocale(name);
134         boolean nameLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(nameLocale.getLanguage());
135         if (!formatterLocaleUsesSpaces && nameLocaleUsesSpaces) {
136             PersonNameFormatterImpl nativeFormatter = new PersonNameFormatterImpl(nameLocale, this.length,
137                     this.usage, this.formality, this.options);
138             String result = nativeFormatter.formatToString(name);
139 
140             // BUT, if the name is actually written in the formatter locale's script, replace any spaces in the name
141             // with the foreignSpaceReplacement character
142             if (!foreignSpaceReplacement.equals(" ") && scriptMatchesLocale(result, this.locale)) {
143                 result = result.replace(" ", this.foreignSpaceReplacement);
144             }
145             return result;
146         }
147 
148         // if we get down to here, we're just doing normal formatting-- if we have both given-first and surname-first
149         // rules, choose which one to use based on the name's locale and preferred field order
150         if (snFirstPatterns == null || nameIsGnFirst(name)) {
151             return getBestPattern(gnFirstPatterns, name).format(name);
152         } else {
153             return getBestPattern(snFirstPatterns, name).format(name);
154         }
155     }
156 
getLocale()157     public Locale getLocale() {
158         return locale;
159     }
160 
getLength()161     public PersonNameFormatter.Length getLength() { return length; }
162 
getUsage()163     public PersonNameFormatter.Usage getUsage() { return usage; }
164 
getFormality()165     public PersonNameFormatter.Formality getFormality() { return formality; }
166 
getOptions()167     public Set<PersonNameFormatter.Options> getOptions() { return options; }
168 
getInitialPattern()169     public String getInitialPattern() {
170         return initialPattern;
171     }
172 
getInitialSequencePattern()173     public String getInitialSequencePattern() {
174         return initialSequencePattern;
175     }
176 
shouldCapitalizeSurname()177     public boolean shouldCapitalizeSurname() {
178         return capitalizeSurname;
179     }
180 
181     private final Set<String> LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "th", "yue", "km", "lo"));
182 
183     /**
184      * Returns the value of the resource, as a string array.
185      * @param resource An ICUResourceBundle of type STRING or ARRAY.  If ARRAY, this function just returns it
186      *                 as a string array.  If STRING, it returns a one-element array containing that string.
187      * @return The resource's value, as an array of Strings.
188      */
asStringArray(ICUResourceBundle resource)189     private String[] asStringArray(ICUResourceBundle resource) {
190         if (resource.getType() == STRING) {
191             return new String[] { resource.getString() };
192         } else if (resource.getType() == ARRAY){
193             return resource.getStringArray();
194         } else {
195             throw new IllegalStateException("Unsupported resource type " + resource.getType());
196         }
197     }
198 
199     /**
200      * Returns the field order to use when formatting this name, taking into account the name's preferredOrder
201      * field, as well as the name and formatter's respective locales.
202      * @param name The name to be formatted.
203      * @return If true, use given-first order to format the name; if false, use surname-first order.
204      */
nameIsGnFirst(PersonName name)205     private boolean nameIsGnFirst(PersonName name) {
206         // the name can declare its order-- check that first (it overrides any locale-based calculation)
207         Set<PersonName.FieldModifier> modifiers = new HashSet<>();
208         String preferredOrder = name.getFieldValue(PersonName.NameField.PREFERRED_ORDER, modifiers);
209         if (preferredOrder != null) {
210             if (preferredOrder.equals("givenFirst")) {
211                 return true;
212             } else if (preferredOrder.equals("surnameFirst")) {
213                 return false;
214             } else {
215                 throw new IllegalArgumentException("Illegal preferredOrder value " + preferredOrder);
216             }
217         }
218 
219         String localeStr = getNameLocale(name).toString();
220         do {
221             if (gnFirstLocales.contains(localeStr)) {
222                 return true;
223             } else if (snFirstLocales.contains(localeStr)) {
224                 return false;
225             }
226 
227             int lastUnderbarPos = localeStr.lastIndexOf("_");
228             if (lastUnderbarPos >= 0) {
229                 localeStr = localeStr.substring(0, lastUnderbarPos);
230             } else {
231                 localeStr = "root";
232             }
233         } while (!localeStr.equals("root"));
234 
235         // should never get here-- "root" should always be in one of the locales
236         return true;
237     }
238 
getBestPattern(PersonNamePattern[] patterns, PersonName name)239     private PersonNamePattern getBestPattern(PersonNamePattern[] patterns, PersonName name) {
240         // early out if there's only one pattern
241         if (patterns.length == 1) {
242             return patterns[0];
243         } else {
244             // if there's more than one pattern, return the one that contains the greatest number of fields that
245             // actually have values in `name`.  If there's a tie, return the pattern that contains the lowest number
246             // of fields that DON'T have values in `name`.
247             int maxPopulatedFields = 0;
248             int minEmptyFields = Integer.MAX_VALUE;
249             PersonNamePattern bestPattern = null;
250 
251             for (PersonNamePattern pattern : patterns) {
252                 int populatedFields = pattern.numPopulatedFields(name);
253                 int emptyFields = pattern.numEmptyFields(name);
254                 if (populatedFields > maxPopulatedFields) {
255                     maxPopulatedFields = populatedFields;
256                     minEmptyFields = emptyFields;
257                     bestPattern = pattern;
258                 } else if (populatedFields == maxPopulatedFields && emptyFields < minEmptyFields) {
259                     minEmptyFields = emptyFields;
260                     bestPattern = pattern;
261                 }
262             }
263             return bestPattern;
264         }
265     }
266 
267     /**
268      * Internal function to figure out the name's locale when the name doesn't specify it.
269      * (Note that this code assumes that if the locale is specified, it includes a language
270      * code.)
271      * @param name The name for which we need the locale
272      * @return The name's (real or guessed) locale.
273      */
getNameLocale(PersonName name)274     private Locale getNameLocale(PersonName name) {
275         // if the name specifies its locale, we can just return it
276         Locale nameLocale = name.getNameLocale();
277         if (nameLocale == null) {
278             // if not, we look at the characters in the name.  If their script matches the default script for the formatter's
279             // locale, we use the formatter's locale as the name's locale
280             int formatterScript = UScript.getCodeFromName(ULocale.addLikelySubtags(ULocale.forLocale(locale)).getScript());
281             String givenName = name.getFieldValue(PersonName.NameField.GIVEN, new HashSet<PersonName.FieldModifier>());
282             int nameScript = UScript.INVALID_CODE;
283             for (int i = 0; nameScript == UScript.INVALID_CODE && i < givenName.length(); i++) {
284                 // the script of the name is the script of the first character in the name whose script isn't
285                 // COMMON or INHERITED
286                 int script = UScript.getScript(givenName.charAt(i));
287                 if (script != UScript.COMMON && script != UScript.INHERITED) {
288                     nameScript = script;
289                 }
290             }
291             if (formatterScript == nameScript) {
292                 nameLocale = this.locale;
293             } else {
294                 // if the name's script is different from the formatter's script, we use addLikelySubtags() to find the
295                 // default language for the name's script and use THAT as the name's locale
296                 nameLocale = new Locale(ULocale.addLikelySubtags(new ULocale("und_" + UScript.getShortName(nameScript))).getLanguage());
297             }
298             // TODO: This algorithm has a few deficiencies: First, it assumes the script of the string is the script of the first
299             // character in the string that's not COMMON or INHERITED.  This won't work well for some languages, such as Japanese,
300             // that use multiple scripts.  Doing better would require adding a new getScript(String) method on UScript, which
301             // might be something we want.  Second, we only look at the given-name field.  This field should always be populated,
302             // but if it isn't, we're stuck.  Looking at all the fields requires API on PersonName that we don't need anywhere
303             // else.
304         }
305         return nameLocale;
306     }
307 
308     /**
309      * Returns true if the script of `s` is one of the default scripts for `locale`.
310      * This function only checks the script of the first character whose script isn't "common,"
311      * so it probably won't work right on mixed-script strings.
312      */
scriptMatchesLocale(String s, Locale locale)313     private boolean scriptMatchesLocale(String s, Locale locale) {
314         int[] localeScripts = UScript.getCode(locale);
315         int stringScript = UScript.COMMON;
316         for (int i = 0; stringScript == UScript.COMMON && i < s.length(); i++) {
317             char c = s.charAt(i);
318             stringScript = UScript.getScript(c);
319         }
320 
321         for (int localeScript : localeScripts) {
322             if (localeScript == stringScript) {
323                 return true;
324             }
325         }
326         return false;
327     }
328 }
329