• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.ImmutableSet;
5 import com.google.common.collect.Multimap;
6 import com.google.common.collect.Multimaps;
7 import com.google.common.collect.TreeMultimap;
8 import com.ibm.icu.text.Collator;
9 import com.ibm.icu.util.ULocale;
10 import java.io.File;
11 import java.io.StringWriter;
12 import java.util.Collection;
13 import java.util.Comparator;
14 import java.util.Locale;
15 import java.util.Map;
16 import java.util.Map.Entry;
17 import java.util.Set;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 import org.unicode.cldr.util.CLDRConfig;
21 import org.unicode.cldr.util.CLDRFile;
22 import org.unicode.cldr.util.CLDRFile.DraftStatus;
23 import org.unicode.cldr.util.CLDRPaths;
24 import org.unicode.cldr.util.CldrUtility;
25 import org.unicode.cldr.util.Factory;
26 import org.unicode.cldr.util.TempPrintWriter;
27 import org.unicode.cldr.util.personname.PersonNameFormatter;
28 import org.unicode.cldr.util.personname.PersonNameFormatter.Field;
29 import org.unicode.cldr.util.personname.PersonNameFormatter.Formality;
30 import org.unicode.cldr.util.personname.PersonNameFormatter.FormatParameters;
31 import org.unicode.cldr.util.personname.PersonNameFormatter.Length;
32 import org.unicode.cldr.util.personname.PersonNameFormatter.ModifiedField;
33 import org.unicode.cldr.util.personname.PersonNameFormatter.Modifier;
34 import org.unicode.cldr.util.personname.PersonNameFormatter.Order;
35 import org.unicode.cldr.util.personname.PersonNameFormatter.SampleType;
36 import org.unicode.cldr.util.personname.PersonNameFormatter.Usage;
37 import org.unicode.cldr.util.personname.SimpleNameObject;
38 
39 public class GeneratePersonNameTestData {
40     private static final Joiner COMMA_JOINER = Joiner.on(", ");
41     private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
42     private static final CLDRFile ENGLISH = CLDR_CONFIG.getEnglish();
43 
44     static final Comparator<String> LENGTH_FIRST =
45             Comparator.comparingInt(String::length)
46                     .reversed()
47                     .thenComparing(Collator.getInstance(Locale.ROOT))
48                     .thenComparing(Comparator.naturalOrder());
49 
50     enum Options {
51         none,
52         sorting
53     }
54 
55     static File dir = new File(CLDRPaths.TEST_DATA, "personNameTest");
56 
57     static final Set<String> REQUIRED_PATHS =
58             ImmutableSet.of(
59                     "//ldml/personNames/nameOrderLocales[@order=\"givenFirst\"]",
60                     "//ldml/personNames/nameOrderLocales[@order=\"surnameFirst\"]",
61                     "//ldml/personNames/parameterDefault[@parameter=\"formality\"]",
62                     "//ldml/personNames/parameterDefault[@parameter=\"length\"]",
63                     "//ldml/personNames/foreignSpaceReplacement",
64                     "//ldml/personNames/nativeSpaceReplacement",
65                     "//ldml/personNames/initialPattern[@type=\"initial\"]",
66                     "//ldml/personNames/initialPattern[@type=\"initialSequence\"]",
67                     "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]",
68                     "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]",
69                     "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]",
70                     "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern"
71                     //
72                     );
73 
main(String[] args)74     public static void main(String[] args) {
75         Factory factory = CLDR_CONFIG.getCldrFactory();
76 
77         Matcher localeMatcher = null;
78         if (args.length >= 1) {
79             localeMatcher = Pattern.compile(args[0]).matcher("");
80         }
81 
82         ULocale undLocale = new ULocale("und");
83 
84         main:
85         for (String locale : factory.getAvailableLanguages()) {
86             if (localeMatcher != null && !localeMatcher.reset(locale).lookingAt()) {
87                 continue;
88             }
89 
90             try {
91                 CLDRFile cldrFile =
92                         factory.make(locale, true, DraftStatus.contributed); // don't include
93                 // draft=unconfirmed/provisional
94                 CLDRFile unresolved = cldrFile.getUnresolved();
95 
96                 // Check that we have sufficient person data
97 
98                 if (!locale.equals("en")) {
99                     for (String path : REQUIRED_PATHS) {
100                         String value = unresolved.getStringValue(path);
101                         if (value == null) {
102                             removeTestFile(locale);
103                             continue main; // skip unless we have person data
104                         }
105                     }
106                 }
107 
108                 // Load the samples, and exit if there is a problem
109 
110                 Map<SampleType, SimpleNameObject> names;
111                 PersonNameFormatter formatter;
112                 try {
113                     names = PersonNameFormatter.loadSampleNames(cldrFile);
114                     formatter = new PersonNameFormatter(cldrFile);
115                 } catch (Exception e) {
116                     removeTestFile(locale);
117                     continue;
118                 }
119                 if (names.isEmpty()) {
120                     removeTestFile(locale);
121                     continue;
122                 }
123 
124                 // We have to jump through some hoops to get locales corresponding to the order
125                 // First get the locale for native sample names
126 
127                 ULocale myLocale = new ULocale(locale);
128 
129                 Order myOrder = formatter.getOrderFromLocale(myLocale);
130                 if (myOrder == null) {
131                     formatter.getOrderFromLocale(myLocale);
132                     throw new IllegalArgumentException("Missing order for: " + locale);
133                 }
134 
135                 // Now get the locale for non-native sample names
136                 // We see if we can get a locale of the other direction
137                 // Otherwise we pick either English or German
138 
139                 Order otherOrder =
140                         myOrder == Order.givenFirst ? Order.surnameFirst : Order.givenFirst;
141                 Map<ULocale, Order> localeToOrder =
142                         formatter.getNamePatternData().getLocaleToOrder();
143                 Multimap<Order, ULocale> orderToLocale =
144                         Multimaps.invertFrom(
145                                 Multimaps.forMap(localeToOrder), TreeMultimap.create());
146                 ULocale otherLocale = null;
147                 for (ULocale tryLocale : orderToLocale.get(otherOrder)) {
148                     if (!undLocale.equals(tryLocale)) {
149                         otherLocale = tryLocale;
150                         break;
151                     }
152                 }
153                 if (otherLocale == null) {
154                     otherLocale = myLocale.equals(ULocale.FRENCH) ? ULocale.GERMAN : ULocale.FRENCH;
155                 }
156 
157                 // now change region to AQ, just to check for inheritance
158                 myLocale = addRegionIfMissing(myLocale, "AQ");
159                 otherLocale = addRegionIfMissing(otherLocale, "AQ");
160 
161                 // Start collecting output
162 
163                 StringWriter output = new StringWriter();
164                 output.write("\n");
165                 writeChoices("field", Field.ALL, output);
166                 writeChoices("modifiers", Modifier.ALL, output);
167                 writeChoices("order", Order.ALL, output);
168                 writeChoices("length", Length.ALL, output);
169                 writeChoices("usage", Usage.ALL, output);
170                 writeChoices("formality", Formality.ALL, output);
171 
172                 for (Entry<SampleType, SimpleNameObject> entry : names.entrySet()) {
173                     // write the name information
174                     SampleType sampleType = entry.getKey();
175                     if (!sampleType.isNative() && otherLocale == null) {
176                         continue;
177                     }
178                     final SimpleNameObject nameObject = entry.getValue();
179 
180                     output.write("\n");
181                     output.write("# " + sampleType + "\n");
182                     for (Entry<ModifiedField, String> x :
183                             nameObject.getModifiedFieldToValue().entrySet()) {
184                         output.write("name ; " + x.getKey() + "; " + x.getValue() + "\n");
185                     }
186 
187                     // handle the situation that ICU's formatter doesn't give us low-level access
188                     // so we have to use the name locale to set the direction
189 
190                     Order nameOrder;
191                     if (sampleType.isNative()) {
192                         output.write("name ; " + "locale" + "; " + myLocale + "\n");
193                         nameOrder = myOrder;
194                     } else {
195                         output.write("name ; " + "locale" + "; " + otherLocale + "\n");
196                         nameOrder = otherOrder;
197                     }
198 
199                     // Group the formatted names, longest first
200 
201                     Multimap<String, FormatParameters> valueToSources =
202                             TreeMultimap.create(LENGTH_FIRST, Comparator.naturalOrder());
203                     for (FormatParameters parameters : FormatParameters.allCldr()) {
204 
205                         //                        boolean debugPoint = locale.startsWith("th") &&
206                         // parameters.equals(testParameters)
207                         //                            && sampleType == SampleType.nativeGS;
208                         //                        if (debugPoint) {
209                         //                            System.out.println(sampleType + "; " +
210                         // nameObject + "; " + testParameters);
211                         //                            int debug = 0;
212                         //                        }
213 
214                         String formatted =
215                                 formatter.formatWithoutSuperscripts(nameObject, parameters);
216 
217                         if (formatted.isEmpty()) {
218                             continue;
219                         }
220                         valueToSources.put(formatted, parameters);
221                     }
222                     // write out the result, and then all the parameters that give produce it.
223                     for (Entry<String, Collection<FormatParameters>> entry2 :
224                             valueToSources.asMap().entrySet()) {
225                         final String expectedResult = entry2.getKey();
226                         output.write("\nexpectedResult; " + expectedResult + "\n\n");
227                         entry2.getValue()
228                                 .forEach(
229                                         x -> {
230                                             output.write(
231                                                     "parameters; "
232                                                             + x.getOrder()
233                                                             + "; "
234                                                             + x.getLength()
235                                                             + "; "
236                                                             + x.getUsage()
237                                                             + "; "
238                                                             + x.getFormality()
239                                                             + "\n");
240                                         });
241                     }
242                     output.write("\nendName\n");
243                 }
244 
245                 try (TempPrintWriter output2 =
246                         TempPrintWriter.openUTF8Writer(dir.toString(), locale + ".txt"); ) {
247                     output2.write(
248                             "# Test data for Person Name Data\n"
249                                     + CldrUtility.getCopyrightString("#  ")
250                                     + "\n# CLDR person name formatting test data for: "
251                                     + locale
252                                     + "\n#"
253                                     + "\n# Test lines have the following structure:"
254                                     + "\n#"
255                                     + "\n# enum ; <type> ; <value>(', ' <value)"
256                                     + "\n#   For all the elements in <…> below, the possible choices that could appear in the file."
257                                     + "\n#   For example, <field> could be any of title, given, … credentials."
258                                     + "\n#   Verify that all of these values work with the implementation."
259                                     + "\n#"
260                                     + "\n# name ; <field>('-'<modifier>) ; <value>"
261                                     + "\n#   A sequence of these is to be used to build a person name object with the given field values."
262                                     + "\n#   If the <field> is 'locale', then the value is the locale of the name."
263                                     + "\n#     That will always be the last field in the name."
264                                     + "\n#     NOTE: the locale for the name (where different than the test file's locale) will generally not match the text."
265                                     + "\n#     It is chosen to exercise the person name formatting, by having a different given-surname order than the file's locale."
266                                     + "\n#"
267                                     + "\n# expectedResult; <value>"
268                                     + "\n#   This line follows a sequence of name lines, and indicates the that all the following parameter lines have this expected value."
269                                     + "\n#"
270                                     + "\n# parameters; <options>; <length>; <usage>; <formality>"
271                                     + "\n#   Each of these parameter lines should be tested to see that when formatting the current name with these parameters, "
272                                     + "\n#   the expected value is produced."
273                                     + "\n#"
274                                     + "\n# endName"
275                                     + "\n#   Indicates the end of the values to be tested with the current name."
276                                     + "\n#"
277                                     + "\n# ====="
278                                     + "\n# Example:"
279                                     + "\n#     enum ; field ; title, given, given2, surname, surname2, generation, credentials"
280                                     + "\n#     …"
281                                     + "\n#"
282                                     + "\n#     name ; given; Iris"
283                                     + "\n#     name ; surname; Falke"
284                                     + "\n#     name ; locale; de"
285                                     + "\n#"
286                                     + "\n#     expectedResult; Falke, Iris"
287                                     + "\n#"
288                                     + "\n#     parameters; sorting; long; referring; formal"
289                                     + "\n#     parameters; sorting; medium; referring; informal"
290                                     + "\n#"
291                                     + "\n#     endName"
292                                     + "\n#"
293                                     + "\n#     name ; given; Max"
294                                     + "\n#     name ; given2; Ben"
295                                     + "\n#     name ; surname; Mustermann"
296                                     + "\n#     …"
297                                     + "\n# ====="
298                                     + "\n");
299                     output2.write(output.toString());
300                 }
301             } catch (Exception e) {
302                 System.out.println("Skipping " + locale);
303                 e.printStackTrace();
304                 removeTestFile(locale);
305                 continue;
306             }
307         }
308     }
309 
removeTestFile(String locale)310     private static void removeTestFile(String locale) {
311         File file = new File(dir.toString(), locale + ".txt");
312         if (file.exists()) {
313             System.out.println("Removing " + file);
314             file.delete();
315         }
316     }
317 
addRegionIfMissing(ULocale myLocale, String region)318     public static ULocale addRegionIfMissing(ULocale myLocale, String region) {
319         return !myLocale.getCountry().isEmpty()
320                 ? myLocale
321                 : new ULocale.Builder().setLocale(myLocale).setRegion(region).build();
322     }
323 
writeChoices(String kind, Collection<T> choices, StringWriter output)324     public static <T> void writeChoices(String kind, Collection<T> choices, StringWriter output) {
325         output.write("enum ; " + kind + " ; " + COMMA_JOINER.join(choices) + "\n");
326     }
327 }
328