1 package org.unicode.cldr.test; 2 3 import com.ibm.icu.text.MessageFormat; 4 import java.io.BufferedReader; 5 import java.io.IOException; 6 import java.util.ArrayList; 7 import java.util.List; 8 import java.util.regex.Matcher; 9 import java.util.regex.Pattern; 10 import org.unicode.cldr.util.CldrUtility; 11 import org.unicode.cldr.util.PatternCache; 12 import org.unicode.cldr.util.SimpleHtmlParser; 13 import org.unicode.cldr.util.SimpleHtmlParser.Type; 14 import org.unicode.cldr.util.TransliteratorUtilities; 15 16 /** Private class to get the messages from a help file. */ 17 public class HelpMessages { 18 private static final Matcher CLEANUP_BOOKMARK = PatternCache.get("[^a-zA-Z0-9]").matcher(""); 19 20 private static final MessageFormat DEFAULT_HEADER_PATTERN = 21 new MessageFormat("<p>{0}</p>" + CldrUtility.LINE_SEPARATOR); 22 23 private static final Matcher HEADER_HTML = 24 PatternCache.get("<h[0-9]>(.*)</h[0-9]>").matcher(""); 25 26 List<Matcher> keys = new ArrayList<>(); 27 28 List<String> values = new ArrayList<>(); 29 30 enum Status { 31 BASE, 32 BEFORE_CELL, 33 IN_CELL, 34 IN_INSIDE_TABLE 35 } 36 37 StringBuilder[] currentColumn = new StringBuilder[2]; 38 39 int column = 0; 40 41 private static HelpMessages helpMessages; 42 43 /** 44 * Create a HelpMessages object from a filename. The file has to be in the format of a table of 45 * <keyRegex,htmlText> pairs, where the key is a keyRegex expression and htmlText is arbitrary 46 * HTML text. For example: 47 * 48 * <p>{@link 49 * http://unicode.org/cldr/data/tools/cldr-code/org/unicode/cldr/util/data/chart_messages.html} 50 * is used for chart messages, where the key is the name of the chart. 51 * 52 * <p>{@link 53 * http://unicode.org/cldr/data/tools/cldr-code/org/unicode/cldr/util/data/test_help_messages.html} 54 * is used for help messages in the survey tool, where the key is an xpath. 55 * 56 * @param filename 57 */ HelpMessages(String filename)58 public HelpMessages(String filename) { 59 currentColumn[0] = new StringBuilder(); 60 currentColumn[1] = new StringBuilder(); 61 BufferedReader in; 62 try { 63 in = CldrUtility.getUTF8Data(filename); 64 int tableCount = 0; 65 66 boolean inContent = false; 67 // if the table level is 1 (we are in the main table), then we look for 68 // <td>...</td><td>...</td>. That 69 // means that we have column 1 and column 2. 70 71 SimpleHtmlParser simple = new SimpleHtmlParser().setReader(in); 72 StringBuilder result = new StringBuilder(); 73 boolean hadPop = false; 74 main: 75 while (true) { 76 Type x = simple.next(result); 77 switch (x) { 78 case ELEMENT: // with /table we pop the count 79 if (SimpleHtmlParser.equals("table", result)) { 80 if (hadPop) { 81 --tableCount; 82 } else { 83 ++tableCount; 84 } 85 } else if (tableCount == 1) { 86 if (SimpleHtmlParser.equals("tr", result)) { 87 if (hadPop) { 88 addHelpMessages(); 89 } 90 column = 0; 91 } else if (SimpleHtmlParser.equals("td", result)) { 92 if (hadPop) { 93 inContent = false; 94 ++column; 95 } else { 96 inContent = true; 97 continue main; // skip adding 98 } 99 } 100 } 101 break; 102 case ELEMENT_POP: 103 hadPop = true; 104 break; 105 case ELEMENT_END: 106 hadPop = false; 107 break; 108 case DONE: 109 break main; 110 } 111 if (inContent) { 112 SimpleHtmlParser.writeResult(x, result, currentColumn[column]); 113 } 114 } 115 116 in.close(); 117 } catch (IOException e) { 118 System.err.println("Can't initialize help text"); 119 } 120 } 121 122 /** 123 * Get message corresponding to a key out of the file set on this object. For many files, the 124 * key will be an xpath, but it doesn't have to be. Note that <i>all</i> of pairs of 125 * <keyRegex,htmlText> where the key matches keyRegex will be concatenated together in order to 126 * get the result. 127 * 128 * @param key 129 * @return 130 */ find(String key)131 public String find(String key) { 132 return find(key, DEFAULT_HEADER_PATTERN); 133 } 134 135 /** 136 * Get message corresponding to a key out of the file set on this object. For many files, the 137 * key will be an xpath, but it doesn't have to be. Note that <i>all</i> of pairs of 138 * <keyRegex,htmlText> where the key matches keyRegex will be concatenated together in order to 139 * get the result. 140 * 141 * @param key 142 * @param addHeader true if you want a header formed by looking at all the hN elements. 143 * @return 144 */ find(String key, MessageFormat headerPattern)145 public String find(String key, MessageFormat headerPattern) { 146 StringBuilder header = new StringBuilder(); 147 StringBuilder result = new StringBuilder(); 148 int keyCount = 0; 149 for (int i = 0; i < keys.size(); ++i) { 150 if (keys.get(i).reset(key).matches()) { 151 if (result.length() != 0) { 152 result.append(CldrUtility.LINE_SEPARATOR); 153 } 154 String value = values.get(i); 155 if (headerPattern != null) { 156 HEADER_HTML.reset(value); 157 int lastEnd = 0; 158 StringBuilder newValue = new StringBuilder(); 159 while (HEADER_HTML.find()) { 160 String contents = HEADER_HTML.group(1); 161 if (contents.contains("<")) { 162 continue; // disallow other formatting 163 } 164 String bookmark = "HM_" + CLEANUP_BOOKMARK.reset(contents).replaceAll("_"); 165 keyCount++; 166 if (header.length() > 0) { 167 header.append(" | "); 168 } 169 header.append("<a href='#") 170 .append(bookmark) 171 .append("'>") 172 .append(contents) 173 .append("</a>"); 174 newValue.append(value.substring(lastEnd, HEADER_HTML.start(1))); 175 newValue.append("<a name='") 176 .append(bookmark) 177 .append("'>") 178 .append(contents) 179 .append("</a>"); 180 lastEnd = HEADER_HTML.end(1); 181 } 182 newValue.append(value.substring(lastEnd)); 183 value = newValue.toString(); 184 } 185 result.append(value); 186 } 187 } 188 if (result.length() != 0) { 189 if (keyCount > 1) { 190 result.insert(0, headerPattern.format(new Object[] {header.toString()})); 191 } 192 return result.toString(); 193 } 194 return null; 195 } 196 addHelpMessages()197 private void addHelpMessages() { 198 if (column == 2) { // must have two columns 199 try { 200 // remove the first character and the last two characters, since the are >....</ 201 String key = currentColumn[0].substring(1, currentColumn[0].length() - 2).trim(); 202 String value = currentColumn[1].substring(1, currentColumn[1].length() - 2).trim(); 203 if (ExampleGenerator.DEBUG_SHOW_HELP) { 204 System.out.println("{" + key + "} => {" + value + "}"); 205 } 206 Matcher m = 207 Pattern.compile( 208 TransliteratorUtilities.fromHTML.transliterate(key), 209 Pattern.COMMENTS) 210 .matcher(""); 211 keys.add(m); 212 values.add(value); 213 } catch (RuntimeException e) { 214 System.err.println("Help file has illegal regex: " + currentColumn[0]); 215 } 216 } 217 currentColumn[0].setLength(0); 218 currentColumn[1].setLength(0); 219 column = 0; 220 } 221 getChartMessages(String xpath)222 public static String getChartMessages(String xpath) { 223 synchronized (HelpMessages.class) { 224 if (HelpMessages.helpMessages == null) { 225 HelpMessages.helpMessages = new HelpMessages("chart_messages.html"); 226 } 227 } 228 return HelpMessages.helpMessages.find(xpath); 229 // if (xpath.contains("/exemplarCharacters")) { 230 // result = "The standard exemplar characters are those used in customary writing ([a-z] for 231 // English; " 232 // + "the auxiliary characters are used in foreign words found in typical magazines, 233 // newspapers, &c.; " 234 // + "currency auxilliary characters are those used in currency symbols, like 'US$ 1,234'. 235 // "; 236 // } 237 // return result == null ? null : TransliteratorUtilities.toHTML.transliterate(result); 238 } 239 } 240