1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.ImmutableSet; 5 import com.google.common.collect.Multimap; 6 import com.google.common.collect.Multimaps; 7 import com.google.common.collect.TreeMultimap; 8 import com.ibm.icu.text.Collator; 9 import com.ibm.icu.util.ULocale; 10 import java.io.File; 11 import java.io.StringWriter; 12 import java.util.Collection; 13 import java.util.Comparator; 14 import java.util.Locale; 15 import java.util.Map; 16 import java.util.Map.Entry; 17 import java.util.Set; 18 import java.util.regex.Matcher; 19 import java.util.regex.Pattern; 20 import org.unicode.cldr.util.CLDRConfig; 21 import org.unicode.cldr.util.CLDRFile; 22 import org.unicode.cldr.util.CLDRFile.DraftStatus; 23 import org.unicode.cldr.util.CLDRPaths; 24 import org.unicode.cldr.util.CldrUtility; 25 import org.unicode.cldr.util.Factory; 26 import org.unicode.cldr.util.TempPrintWriter; 27 import org.unicode.cldr.util.personname.PersonNameFormatter; 28 import org.unicode.cldr.util.personname.PersonNameFormatter.Field; 29 import org.unicode.cldr.util.personname.PersonNameFormatter.Formality; 30 import org.unicode.cldr.util.personname.PersonNameFormatter.FormatParameters; 31 import org.unicode.cldr.util.personname.PersonNameFormatter.Length; 32 import org.unicode.cldr.util.personname.PersonNameFormatter.ModifiedField; 33 import org.unicode.cldr.util.personname.PersonNameFormatter.Modifier; 34 import org.unicode.cldr.util.personname.PersonNameFormatter.Order; 35 import org.unicode.cldr.util.personname.PersonNameFormatter.SampleType; 36 import org.unicode.cldr.util.personname.PersonNameFormatter.Usage; 37 import org.unicode.cldr.util.personname.SimpleNameObject; 38 39 public class GeneratePersonNameTestData { 40 private static final Joiner COMMA_JOINER = Joiner.on(", "); 41 private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance(); 42 private static final CLDRFile ENGLISH = CLDR_CONFIG.getEnglish(); 43 44 static final Comparator<String> LENGTH_FIRST = 45 Comparator.comparingInt(String::length) 46 .reversed() 47 .thenComparing(Collator.getInstance(Locale.ROOT)) 48 .thenComparing(Comparator.naturalOrder()); 49 50 enum Options { 51 none, 52 sorting 53 } 54 55 static File dir = new File(CLDRPaths.TEST_DATA, "personNameTest"); 56 57 static final Set<String> REQUIRED_PATHS = 58 ImmutableSet.of( 59 "//ldml/personNames/nameOrderLocales[@order=\"givenFirst\"]", 60 "//ldml/personNames/nameOrderLocales[@order=\"surnameFirst\"]", 61 "//ldml/personNames/parameterDefault[@parameter=\"formality\"]", 62 "//ldml/personNames/parameterDefault[@parameter=\"length\"]", 63 "//ldml/personNames/foreignSpaceReplacement", 64 "//ldml/personNames/nativeSpaceReplacement", 65 "//ldml/personNames/initialPattern[@type=\"initial\"]", 66 "//ldml/personNames/initialPattern[@type=\"initialSequence\"]", 67 "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]", 68 "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]", 69 "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]", 70 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern" 71 // 72 ); 73 main(String[] args)74 public static void main(String[] args) { 75 Factory factory = CLDR_CONFIG.getCldrFactory(); 76 77 Matcher localeMatcher = null; 78 if (args.length >= 1) { 79 localeMatcher = Pattern.compile(args[0]).matcher(""); 80 } 81 82 ULocale undLocale = new ULocale("und"); 83 84 main: 85 for (String locale : factory.getAvailableLanguages()) { 86 if (localeMatcher != null && !localeMatcher.reset(locale).lookingAt()) { 87 continue; 88 } 89 90 try { 91 CLDRFile cldrFile = 92 factory.make(locale, true, DraftStatus.contributed); // don't include 93 // draft=unconfirmed/provisional 94 CLDRFile unresolved = cldrFile.getUnresolved(); 95 96 // Check that we have sufficient person data 97 98 if (!locale.equals("en")) { 99 for (String path : REQUIRED_PATHS) { 100 String value = unresolved.getStringValue(path); 101 if (value == null) { 102 removeTestFile(locale); 103 continue main; // skip unless we have person data 104 } 105 } 106 } 107 108 // Load the samples, and exit if there is a problem 109 110 Map<SampleType, SimpleNameObject> names; 111 PersonNameFormatter formatter; 112 try { 113 names = PersonNameFormatter.loadSampleNames(cldrFile); 114 formatter = new PersonNameFormatter(cldrFile); 115 } catch (Exception e) { 116 removeTestFile(locale); 117 continue; 118 } 119 if (names.isEmpty()) { 120 removeTestFile(locale); 121 continue; 122 } 123 124 // We have to jump through some hoops to get locales corresponding to the order 125 // First get the locale for native sample names 126 127 ULocale myLocale = new ULocale(locale); 128 129 Order myOrder = formatter.getOrderFromLocale(myLocale); 130 if (myOrder == null) { 131 formatter.getOrderFromLocale(myLocale); 132 throw new IllegalArgumentException("Missing order for: " + locale); 133 } 134 135 // Now get the locale for non-native sample names 136 // We see if we can get a locale of the other direction 137 // Otherwise we pick either English or German 138 139 Order otherOrder = 140 myOrder == Order.givenFirst ? Order.surnameFirst : Order.givenFirst; 141 Map<ULocale, Order> localeToOrder = 142 formatter.getNamePatternData().getLocaleToOrder(); 143 Multimap<Order, ULocale> orderToLocale = 144 Multimaps.invertFrom( 145 Multimaps.forMap(localeToOrder), TreeMultimap.create()); 146 ULocale otherLocale = null; 147 for (ULocale tryLocale : orderToLocale.get(otherOrder)) { 148 if (!undLocale.equals(tryLocale)) { 149 otherLocale = tryLocale; 150 break; 151 } 152 } 153 if (otherLocale == null) { 154 otherLocale = myLocale.equals(ULocale.FRENCH) ? ULocale.GERMAN : ULocale.FRENCH; 155 } 156 157 // now change region to AQ, just to check for inheritance 158 myLocale = addRegionIfMissing(myLocale, "AQ"); 159 otherLocale = addRegionIfMissing(otherLocale, "AQ"); 160 161 // Start collecting output 162 163 StringWriter output = new StringWriter(); 164 output.write("\n"); 165 writeChoices("field", Field.ALL, output); 166 writeChoices("modifiers", Modifier.ALL, output); 167 writeChoices("order", Order.ALL, output); 168 writeChoices("length", Length.ALL, output); 169 writeChoices("usage", Usage.ALL, output); 170 writeChoices("formality", Formality.ALL, output); 171 172 for (Entry<SampleType, SimpleNameObject> entry : names.entrySet()) { 173 // write the name information 174 SampleType sampleType = entry.getKey(); 175 if (!sampleType.isNative() && otherLocale == null) { 176 continue; 177 } 178 final SimpleNameObject nameObject = entry.getValue(); 179 180 output.write("\n"); 181 output.write("# " + sampleType + "\n"); 182 for (Entry<ModifiedField, String> x : 183 nameObject.getModifiedFieldToValue().entrySet()) { 184 output.write("name ; " + x.getKey() + "; " + x.getValue() + "\n"); 185 } 186 187 // handle the situation that ICU's formatter doesn't give us low-level access 188 // so we have to use the name locale to set the direction 189 190 Order nameOrder; 191 if (sampleType.isNative()) { 192 output.write("name ; " + "locale" + "; " + myLocale + "\n"); 193 nameOrder = myOrder; 194 } else { 195 output.write("name ; " + "locale" + "; " + otherLocale + "\n"); 196 nameOrder = otherOrder; 197 } 198 199 // Group the formatted names, longest first 200 201 Multimap<String, FormatParameters> valueToSources = 202 TreeMultimap.create(LENGTH_FIRST, Comparator.naturalOrder()); 203 for (FormatParameters parameters : FormatParameters.allCldr()) { 204 205 // boolean debugPoint = locale.startsWith("th") && 206 // parameters.equals(testParameters) 207 // && sampleType == SampleType.nativeGS; 208 // if (debugPoint) { 209 // System.out.println(sampleType + "; " + 210 // nameObject + "; " + testParameters); 211 // int debug = 0; 212 // } 213 214 String formatted = 215 formatter.formatWithoutSuperscripts(nameObject, parameters); 216 217 if (formatted.isEmpty()) { 218 continue; 219 } 220 valueToSources.put(formatted, parameters); 221 } 222 // write out the result, and then all the parameters that give produce it. 223 for (Entry<String, Collection<FormatParameters>> entry2 : 224 valueToSources.asMap().entrySet()) { 225 final String expectedResult = entry2.getKey(); 226 output.write("\nexpectedResult; " + expectedResult + "\n\n"); 227 entry2.getValue() 228 .forEach( 229 x -> { 230 output.write( 231 "parameters; " 232 + x.getOrder() 233 + "; " 234 + x.getLength() 235 + "; " 236 + x.getUsage() 237 + "; " 238 + x.getFormality() 239 + "\n"); 240 }); 241 } 242 output.write("\nendName\n"); 243 } 244 245 try (TempPrintWriter output2 = 246 TempPrintWriter.openUTF8Writer(dir.toString(), locale + ".txt"); ) { 247 output2.write( 248 "# Test data for Person Name Data\n" 249 + CldrUtility.getCopyrightString("# ") 250 + "\n# CLDR person name formatting test data for: " 251 + locale 252 + "\n#" 253 + "\n# Test lines have the following structure:" 254 + "\n#" 255 + "\n# enum ; <type> ; <value>(', ' <value)" 256 + "\n# For all the elements in <…> below, the possible choices that could appear in the file." 257 + "\n# For example, <field> could be any of title, given, … credentials." 258 + "\n# Verify that all of these values work with the implementation." 259 + "\n#" 260 + "\n# name ; <field>('-'<modifier>) ; <value>" 261 + "\n# A sequence of these is to be used to build a person name object with the given field values." 262 + "\n# If the <field> is 'locale', then the value is the locale of the name." 263 + "\n# That will always be the last field in the name." 264 + "\n# NOTE: the locale for the name (where different than the test file's locale) will generally not match the text." 265 + "\n# It is chosen to exercise the person name formatting, by having a different given-surname order than the file's locale." 266 + "\n#" 267 + "\n# expectedResult; <value>" 268 + "\n# This line follows a sequence of name lines, and indicates the that all the following parameter lines have this expected value." 269 + "\n#" 270 + "\n# parameters; <options>; <length>; <usage>; <formality>" 271 + "\n# Each of these parameter lines should be tested to see that when formatting the current name with these parameters, " 272 + "\n# the expected value is produced." 273 + "\n#" 274 + "\n# endName" 275 + "\n# Indicates the end of the values to be tested with the current name." 276 + "\n#" 277 + "\n# =====" 278 + "\n# Example:" 279 + "\n# enum ; field ; title, given, given2, surname, surname2, generation, credentials" 280 + "\n# …" 281 + "\n#" 282 + "\n# name ; given; Iris" 283 + "\n# name ; surname; Falke" 284 + "\n# name ; locale; de" 285 + "\n#" 286 + "\n# expectedResult; Falke, Iris" 287 + "\n#" 288 + "\n# parameters; sorting; long; referring; formal" 289 + "\n# parameters; sorting; medium; referring; informal" 290 + "\n#" 291 + "\n# endName" 292 + "\n#" 293 + "\n# name ; given; Max" 294 + "\n# name ; given2; Ben" 295 + "\n# name ; surname; Mustermann" 296 + "\n# …" 297 + "\n# =====" 298 + "\n"); 299 output2.write(output.toString()); 300 } 301 } catch (Exception e) { 302 System.out.println("Skipping " + locale); 303 e.printStackTrace(); 304 removeTestFile(locale); 305 continue; 306 } 307 } 308 } 309 removeTestFile(String locale)310 private static void removeTestFile(String locale) { 311 File file = new File(dir.toString(), locale + ".txt"); 312 if (file.exists()) { 313 System.out.println("Removing " + file); 314 file.delete(); 315 } 316 } 317 addRegionIfMissing(ULocale myLocale, String region)318 public static ULocale addRegionIfMissing(ULocale myLocale, String region) { 319 return !myLocale.getCountry().isEmpty() 320 ? myLocale 321 : new ULocale.Builder().setLocale(myLocale).setRegion(region).build(); 322 } 323 writeChoices(String kind, Collection<T> choices, StringWriter output)324 public static <T> void writeChoices(String kind, Collection<T> choices, StringWriter output) { 325 output.write("enum ; " + kind + " ; " + COMMA_JOINER.join(choices) + "\n"); 326 } 327 } 328