1 package org.unicode.cldr.test; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.util.ArrayList; 6 import java.util.List; 7 import java.util.regex.Matcher; 8 import java.util.regex.Pattern; 9 10 import org.unicode.cldr.util.CldrUtility; 11 import org.unicode.cldr.util.PatternCache; 12 import org.unicode.cldr.util.SimpleHtmlParser; 13 import org.unicode.cldr.util.SimpleHtmlParser.Type; 14 import org.unicode.cldr.util.TransliteratorUtilities; 15 16 import com.ibm.icu.text.MessageFormat; 17 18 /** 19 * Private class to get the messages from a help file. 20 */ 21 public class HelpMessages { 22 private static final Matcher CLEANUP_BOOKMARK = PatternCache.get("[^a-zA-Z0-9]").matcher(""); 23 24 private static final MessageFormat DEFAULT_HEADER_PATTERN = new MessageFormat("<p>{0}</p>" 25 + CldrUtility.LINE_SEPARATOR); 26 27 private static final Matcher HEADER_HTML = PatternCache.get("<h[0-9]>(.*)</h[0-9]>").matcher(""); 28 29 List<Matcher> keys = new ArrayList<>(); 30 31 List<String> values = new ArrayList<>(); 32 33 enum Status { 34 BASE, BEFORE_CELL, IN_CELL, IN_INSIDE_TABLE 35 } 36 37 StringBuilder[] currentColumn = new StringBuilder[2]; 38 39 int column = 0; 40 41 private static HelpMessages helpMessages; 42 43 /** 44 * Create a HelpMessages object from a filename. 45 * The file has to be in the format of a table of <keyRegex,htmlText> pairs, 46 * where the key is a keyRegex expression and htmlText is arbitrary HTML text. For example: 47 * <p> 48 * {@link http://unicode.org/cldr/data/tools/java/org/unicode/cldr/util/data/chart_messages.html} is used for 49 * chart messages, where the key is the name of the chart. 50 * <p> 51 * {@link http://unicode.org/cldr/data/tools/java/org/unicode/cldr/util/data/test_help_messages.html} is used 52 * for help messages in the survey tool, where the key is an xpath. 53 * 54 * @param filename 55 */ HelpMessages(String filename)56 public HelpMessages(String filename) { 57 currentColumn[0] = new StringBuilder(); 58 currentColumn[1] = new StringBuilder(); 59 BufferedReader in; 60 try { 61 in = CldrUtility.getUTF8Data(filename); 62 int tableCount = 0; 63 64 boolean inContent = false; 65 // if the table level is 1 (we are in the main table), then we look for <td>...</td><td>...</td>. That 66 // means that we have column 1 and column 2. 67 68 SimpleHtmlParser simple = new SimpleHtmlParser().setReader(in); 69 StringBuilder result = new StringBuilder(); 70 boolean hadPop = false; 71 main: while (true) { 72 Type x = simple.next(result); 73 switch (x) { 74 case ELEMENT: // with /table we pop the count 75 if (SimpleHtmlParser.equals("table", result)) { 76 if (hadPop) { 77 --tableCount; 78 } else { 79 ++tableCount; 80 } 81 } else if (tableCount == 1) { 82 if (SimpleHtmlParser.equals("tr", result)) { 83 if (hadPop) { 84 addHelpMessages(); 85 } 86 column = 0; 87 } else if (SimpleHtmlParser.equals("td", result)) { 88 if (hadPop) { 89 inContent = false; 90 ++column; 91 } else { 92 inContent = true; 93 continue main; // skip adding 94 } 95 } 96 } 97 break; 98 case ELEMENT_POP: 99 hadPop = true; 100 break; 101 case ELEMENT_END: 102 hadPop = false; 103 break; 104 case DONE: 105 break main; 106 } 107 if (inContent) { 108 SimpleHtmlParser.writeResult(x, result, currentColumn[column]); 109 } 110 } 111 112 in.close(); 113 } catch (IOException e) { 114 System.err.println("Can't initialize help text"); 115 } 116 } 117 118 /** 119 * Get message corresponding to a key out of the file set on this object. 120 * For many files, the key will be an xpath, but it doesn't have to be. 121 * Note that <i>all</i> of pairs of <keyRegex,htmlText> where the key matches keyRegex 122 * will be concatenated together in order to get the result. 123 * 124 * @param key 125 * @return 126 */ find(String key)127 public String find(String key) { 128 return find(key, DEFAULT_HEADER_PATTERN); 129 } 130 131 /** 132 * Get message corresponding to a key out of the file set on this object. 133 * For many files, the key will be an xpath, but it doesn't have to be. 134 * Note that <i>all</i> of pairs of <keyRegex,htmlText> where the key matches keyRegex 135 * will be concatenated together in order to get the result. 136 * 137 * @param key 138 * @param addHeader 139 * true if you want a header formed by looking at all the hN elements. 140 * @return 141 */ find(String key, MessageFormat headerPattern)142 public String find(String key, MessageFormat headerPattern) { 143 StringBuilder header = new StringBuilder(); 144 StringBuilder result = new StringBuilder(); 145 int keyCount = 0; 146 for (int i = 0; i < keys.size(); ++i) { 147 if (keys.get(i).reset(key).matches()) { 148 if (result.length() != 0) { 149 result.append(CldrUtility.LINE_SEPARATOR); 150 } 151 String value = values.get(i); 152 if (headerPattern != null) { 153 HEADER_HTML.reset(value); 154 int lastEnd = 0; 155 StringBuilder newValue = new StringBuilder(); 156 while (HEADER_HTML.find()) { 157 String contents = HEADER_HTML.group(1); 158 if (contents.contains("<")) { 159 continue; // disallow other formatting 160 } 161 String bookmark = "HM_" + CLEANUP_BOOKMARK.reset(contents).replaceAll("_"); 162 keyCount++; 163 if (header.length() > 0) { 164 header.append(" | "); 165 } 166 header.append("<a href='#").append(bookmark).append("'>").append(contents).append("</a>"); 167 newValue.append(value.substring(lastEnd, HEADER_HTML.start(1))); 168 newValue.append("<a name='").append(bookmark).append("'>").append(contents).append("</a>"); 169 lastEnd = HEADER_HTML.end(1); 170 } 171 newValue.append(value.substring(lastEnd)); 172 value = newValue.toString(); 173 } 174 result.append(value); 175 } 176 } 177 if (result.length() != 0) { 178 if (keyCount > 1) { 179 result.insert(0, headerPattern.format(new Object[] { header.toString() })); 180 } 181 return result.toString(); 182 } 183 return null; 184 } 185 addHelpMessages()186 private void addHelpMessages() { 187 if (column == 2) { // must have two columns 188 try { 189 // remove the first character and the last two characters, since the are >....</ 190 String key = currentColumn[0].substring(1, currentColumn[0].length() - 2).trim(); 191 String value = currentColumn[1].substring(1, currentColumn[1].length() - 2).trim(); 192 if (ExampleGenerator.DEBUG_SHOW_HELP) { 193 System.out.println("{" + key + "} => {" + value + "}"); 194 } 195 Matcher m = Pattern.compile(TransliteratorUtilities.fromHTML.transliterate(key), Pattern.COMMENTS) 196 .matcher(""); 197 keys.add(m); 198 values.add(value); 199 } catch (RuntimeException e) { 200 System.err.println("Help file has illegal regex: " + currentColumn[0]); 201 } 202 } 203 currentColumn[0].setLength(0); 204 currentColumn[1].setLength(0); 205 column = 0; 206 } 207 getChartMessages(String xpath)208 public static String getChartMessages(String xpath) { 209 synchronized (HelpMessages.class) { 210 if (HelpMessages.helpMessages == null) { 211 HelpMessages.helpMessages = new HelpMessages("chart_messages.html"); 212 } 213 } 214 return HelpMessages.helpMessages.find(xpath); 215 // if (xpath.contains("/exemplarCharacters")) { 216 // result = "The standard exemplar characters are those used in customary writing ([a-z] for English; " 217 // + "the auxiliary characters are used in foreign words found in typical magazines, newspapers, &c.; " 218 // + "currency auxilliary characters are those used in currency symbols, like 'US$ 1,234'. "; 219 // } 220 // return result == null ? null : TransliteratorUtilities.toHTML.transliterate(result); 221 } 222 }