1 package org.unicode.cldr.util; 2 3 import java.util.ArrayList; 4 import java.util.Arrays; 5 import java.util.EnumSet; 6 import java.util.HashMap; 7 import java.util.List; 8 import java.util.Map; 9 import java.util.Map.Entry; 10 import java.util.logging.Logger; 11 import java.util.Set; 12 import java.util.TreeSet; 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 16 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo; 17 18 import com.ibm.icu.text.MessageFormat; 19 import com.ibm.icu.util.Output; 20 21 public class PathDescription { 22 23 private final static Logger logger = Logger.getLogger(PathDescription.class.getName()); 24 25 public enum ErrorHandling { 26 SKIP, CONTINUE 27 } 28 29 // BE sure to sync with the list in xmbSkip! 30 public static final Set<String> EXTRA_LANGUAGES = new TreeSet<>( 31 Arrays 32 .asList( 33 "ach|af|ak|ak|am|ar|az|be|bem|bg|bh|bn|br|bs|ca|chr|ckb|co|crs|cs|cy|da|de|de_AT|de_CH|ee|el|en|en_AU|en_CA|en_GB|en_US|eo|es|es_419|es_ES|et|eu|fa|fi|fil|fo|fr|fr_CA|fr_CH|fy|ga|gaa|gd|gl|gn|gsw|gu|ha|haw|he|hi|hr|ht|hu|hy|ia|id|ig|io|is|it|ja|jv|ka|kg|kk|km|kn|ko|kri|ku|ky|la|lg|ln|lo|loz|lt|lua|lv|mfe|mg|mi|mk|ml|mn|mr|ms|mt|my|nb|ne|nl|nl_BE|nn|no|nso|ny|nyn|oc|om|or|pa|pcm|pl|ps|pt|pt_BR|pt_PT|qu|rm|rn|ro|ro|ro_MD|ru|rw|sd|si|sk|sl|sn|so|sq|sr|sr_Latn|sr_ME|st|su|sv|sw|ta|te|tg|th|ti|tk|tlh|tn|to|tr|tt|tum|ug|uk|und|ur|uz|vi|wo|xh|yi|yo|zh|zh_Hans|zh_Hant|zh_HK|zu|zxx" 34 .split("|"))); 35 36 private static final Pattern METAZONE_PATTERN = Pattern 37 .compile("//ldml/dates/timeZoneNames/metazone\\[@type=\"([^\"]*)\"]/(.*)/(.*)"); 38 private static final Pattern STAR_ATTRIBUTE_PATTERN = PatternCache.get("=\"([^\"]*)\""); 39 40 private static final StandardCodes STANDARD_CODES = StandardCodes.make(); 41 private static Map<String, String> ZONE2COUNTRY = STANDARD_CODES.getZoneToCounty(); 42 private static RegexLookup<String> pathHandling = new RegexLookup<String>().loadFromFile(PathDescription.class, 43 "data/PathDescription.txt"); 44 45 // set in construction 46 47 private final CLDRFile english; 48 private final Map<String, String> extras; 49 private final ErrorHandling errorHandling; 50 private final Map<String, List<Set<String>>> starredPaths; 51 private final Set<String> allMetazones; 52 53 // used on instance 54 55 private Matcher metazoneMatcher = METAZONE_PATTERN.matcher(""); 56 private String starredPathOutput; 57 private Output<String[]> pathArguments = new Output<>(); 58 private EnumSet<Status> status = EnumSet.noneOf(Status.class); 59 60 public static final String MISSING_DESCRIPTION = "Before translating, please see http://cldr.org/translation."; 61 PathDescription(SupplementalDataInfo supplementalDataInfo, CLDRFile english, Map<String, String> extras, Map<String, List<Set<String>>> starredPaths, ErrorHandling errorHandling)62 public PathDescription(SupplementalDataInfo supplementalDataInfo, 63 CLDRFile english, 64 Map<String, String> extras, 65 Map<String, List<Set<String>>> starredPaths, 66 ErrorHandling errorHandling) { 67 this.english = english; 68 this.extras = extras == null ? new HashMap<>() : extras; 69 this.starredPaths = starredPaths == null ? new HashMap<>() : starredPaths; 70 allMetazones = supplementalDataInfo.getAllMetazones(); 71 this.errorHandling = errorHandling; 72 } 73 getStarredPathOutput()74 public String getStarredPathOutput() { 75 return starredPathOutput; 76 } 77 getStatus()78 public EnumSet<Status> getStatus() { 79 return status; 80 } 81 82 public enum Status { 83 SKIP, NULL_VALUE, EMPTY_CONTENT, NOT_REQUIRED 84 } 85 getRawDescription(String path, String value, Object context)86 public String getRawDescription(String path, String value, Object context) { 87 status.clear(); 88 return pathHandling.get(path, context, pathArguments); 89 } 90 getDescription(String path, String value, Level level, Object context)91 public String getDescription(String path, String value, Level level, Object context) { 92 status.clear(); 93 94 String description = pathHandling.get(path, context, pathArguments); 95 if (description == null) { 96 description = MISSING_DESCRIPTION; 97 } else if ("SKIP".equals(description)) { 98 status.add(Status.SKIP); 99 if (errorHandling == ErrorHandling.SKIP) { 100 return null; 101 } 102 } 103 104 // String localeWhereFound = english.getSourceLocaleID(path, status); 105 // if (!status.pathWhereFound.equals(path)) { 106 // reasonsToPaths.put("alias", path + " " + value); 107 // continue; 108 // } 109 if (value == null) { // a count item? 110 String xpath = extras.get(path); 111 if (xpath != null) { 112 value = english.getStringValue(xpath); 113 } else if (path.contains("/metazone")) { 114 if (metazoneMatcher.reset(path).matches()) { 115 String name = metazoneMatcher.group(1); 116 String type = metazoneMatcher.group(3); 117 value = name.replace('_', ' ') 118 + (type.equals("generic") ? "" : type.equals("daylight") ? " Summer" : " Winter") + " Time"; 119 // System.out.println("Missing: " + path + " : " + value); 120 } 121 } 122 if (value == null) { 123 status.add(Status.NULL_VALUE); 124 if (errorHandling == ErrorHandling.SKIP) { 125 return null; 126 } 127 } 128 } 129 if (value != null && value.length() == 0) { 130 status.add(Status.EMPTY_CONTENT); 131 if (errorHandling == ErrorHandling.SKIP) { 132 return null; 133 } 134 } 135 // if (GenerateXMB.contentMatcher != null && !GenerateXMB.contentMatcher.reset(value).find()) { 136 // PathDescription.addSkipReasons(reasonsToPaths, "content-parameter", level, path, value); 137 // return null; 138 // } 139 140 List<String> attributes = addStarredInfo(starredPaths, path); 141 142 // In special cases, only use if there is a root value (languageNames, ... 143 if (description.startsWith("ROOT")) { 144 int typeEnd = description.indexOf(';'); 145 String type = description.substring(4, typeEnd).trim(); 146 description = description.substring(typeEnd + 1).trim(); 147 148 boolean isMetazone = type.equals("metazone"); 149 String code = attributes.get(0); 150 boolean isRootCode = isRootCode(code, allMetazones, type, isMetazone); 151 if (!isRootCode) { 152 status.add(Status.NOT_REQUIRED); 153 if (errorHandling == ErrorHandling.SKIP) { 154 return null; 155 } 156 } 157 if (isMetazone) { 158 XPathParts parts = XPathParts.getFrozenInstance(path); 159 String daylightType = parts.getElement(-1); 160 daylightType = daylightType.equals("daylight") ? "summer" : daylightType.equals("standard") ? "winter" 161 : daylightType; 162 String length = parts.getElement(-2); 163 length = length.equals("long") ? "" : "abbreviated "; 164 code = code + ", " + length + daylightType + " form"; 165 } else if (type.equals("timezone")) { 166 String country = ZONE2COUNTRY.get(code); 167 int lastSlash = code.lastIndexOf('/'); 168 String codeName = lastSlash < 0 ? code : code.substring(lastSlash + 1).replace('_', ' '); 169 170 boolean found = false; 171 if ("001".equals(country)) { 172 code = "the timezone “" + codeName + "”"; 173 found = true; 174 } else if (country != null) { 175 String countryName = english.getName("territory", country); 176 if (countryName != null) { 177 if (!codeName.equals(countryName)) { 178 code = "the city “" + codeName + "” (in " + countryName + ")"; 179 } else { 180 code = "the country “" + codeName + "”"; 181 } 182 found = true; 183 } 184 } 185 if (!found) { 186 logger.warning("Missing country for timezone " + code); 187 } 188 } 189 description = MessageFormat.format(MessageFormat.autoQuoteApostrophe(description), new Object[] { code }); 190 } else if (path.contains("exemplarCity")) { 191 String regionCode = ZONE2COUNTRY.get(attributes.get(0)); 192 String englishRegionName = english.getName(CLDRFile.TERRITORY_NAME, regionCode); 193 description = MessageFormat.format(MessageFormat.autoQuoteApostrophe(description), 194 new Object[] { englishRegionName }); 195 } else if (description != MISSING_DESCRIPTION) { 196 description = MessageFormat.format(MessageFormat.autoQuoteApostrophe(description), 197 (Object[]) pathArguments.value); 198 } 199 200 return description; 201 } 202 203 /** 204 * Creates an escaped HTML string of placeholder information. 205 * 206 * @param path 207 * the xpath to specify placeholder information for 208 * @return a HTML string, or an empty string if there was no placeholder information 209 */ 210 public String getPlaceholderDescription(String path) { 211 Map<String, PlaceholderInfo> placeholders = PatternPlaceholders.getInstance().get(path); 212 if (placeholders != null && placeholders.size() > 0) { 213 StringBuffer buffer = new StringBuffer(); 214 buffer.append("<table>"); 215 buffer.append("<tr><th>Placeholder</th><th>Meaning</th><th>Example</th></tr>"); 216 for (Entry<String, PlaceholderInfo> entry : placeholders.entrySet()) { 217 PlaceholderInfo info = entry.getValue(); 218 buffer.append("<tr>"); 219 buffer.append("<td>").append(entry.getKey()).append("</td>"); 220 buffer.append("<td>").append(info.name).append("</td>"); 221 buffer.append("<td>").append(info.example).append("</td>"); 222 buffer.append("</tr>"); 223 } 224 buffer.append("</table>"); 225 return buffer.toString(); 226 } 227 return ""; 228 } 229 isRootCode(String code, Set<String> allMetazones, String type, boolean isMetazone)230 private static boolean isRootCode(String code, Set<String> allMetazones, String type, boolean isMetazone) { 231 Set<String> codes = isMetazone ? allMetazones 232 : type.equals("timezone") ? STANDARD_CODES.getCanonicalTimeZones() 233 : STANDARD_CODES.getSurveyToolDisplayCodes(type); 234 // end 235 boolean isRootCode = codes.contains(code) || code.contains("_"); 236 if (!isRootCode && type.equals("language") 237 && EXTRA_LANGUAGES.contains(code)) { 238 isRootCode = true; 239 } 240 return isRootCode; 241 } 242 addStarredInfo(Map<String, List<Set<String>>> starredPaths, String path)243 private List<String> addStarredInfo(Map<String, List<Set<String>>> starredPaths, String path) { 244 Matcher starAttributeMatcher = STAR_ATTRIBUTE_PATTERN.matcher(path); 245 StringBuilder starredPath = new StringBuilder(); 246 List<String> attributes = new ArrayList<>(); 247 int lastEnd = 0; 248 while (starAttributeMatcher.find()) { 249 int start = starAttributeMatcher.start(1); 250 int end = starAttributeMatcher.end(1); 251 starredPath.append(path.substring(lastEnd, start)); 252 starredPath.append(".*"); 253 254 attributes.add(path.substring(start, end)); 255 lastEnd = end; 256 } 257 starredPath.append(path.substring(lastEnd)); 258 String starredPathString = starredPath.toString().intern(); 259 starredPathOutput = starredPathString; 260 261 List<Set<String>> attributeList = starredPaths.get(starredPathString); 262 if (attributeList == null) { 263 starredPaths.put(starredPathString, attributeList = new ArrayList<>()); 264 } 265 int i = 0; 266 for (String attribute : attributes) { 267 if (attributeList.size() <= i) { 268 TreeSet<String> subset = new TreeSet<>(); 269 subset.add(attribute); 270 attributeList.add(subset); 271 } else { 272 Set<String> subset = attributeList.get(i); 273 subset.add(attribute); 274 } 275 ++i; 276 } 277 return attributes; 278 } 279 } 280