1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.FileInputStream; 12 import java.io.IOException; 13 import java.io.InputStream; 14 import java.io.InputStreamReader; 15 import java.io.Reader; 16 import java.nio.charset.Charset; 17 import java.util.ArrayList; 18 import java.util.List; 19 import java.util.Stack; 20 21 import org.xml.sax.Attributes; 22 import org.xml.sax.ContentHandler; 23 import org.xml.sax.ErrorHandler; 24 import org.xml.sax.InputSource; 25 import org.xml.sax.Locator; 26 import org.xml.sax.SAXException; 27 import org.xml.sax.SAXParseException; 28 import org.xml.sax.XMLReader; 29 import org.xml.sax.ext.DeclHandler; 30 import org.xml.sax.ext.LexicalHandler; 31 import org.xml.sax.helpers.XMLReaderFactory; 32 33 import com.google.common.base.Function; 34 35 /** 36 * Convenience class to make reading XML data files easier. The main method is read(); 37 * This is meant for XML data files, so the contents of elements must either be all other elements, or 38 * just text. It is thus not suitable for XML files with MIXED content; 39 * all text content in a mixed element is discarded. 40 * 41 * @author davis 42 */ 43 public class XMLFileReader { 44 static final boolean SHOW_ALL = false; 45 /** 46 * Handlers to use in read() 47 */ 48 public static int CONTENT_HANDLER = 1, ERROR_HANDLER = 2, LEXICAL_HANDLER = 4, DECLARATION_HANDLER = 8; 49 50 private MyContentHandler DEFAULT_DECLHANDLER = new MyContentHandler(); 51 private SimpleHandler simpleHandler; 52 53 public static class SimpleHandler { handlePathValue(String path, String value)54 public void handlePathValue(String path, String value) { 55 }; 56 handleComment(String path, String comment)57 public void handleComment(String path, String comment) { 58 }; 59 handleElementDecl(String name, String model)60 public void handleElementDecl(String name, String model) { 61 }; 62 handleAttributeDecl(String eName, String aName, String type, String mode, String value)63 public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) { 64 }; 65 handleEndDtd()66 public void handleEndDtd() { 67 } 68 handleStartDtd(String name, String publicId, String systemId)69 public void handleStartDtd(String name, String publicId, String systemId) { 70 }; 71 } 72 setHandler(SimpleHandler simpleHandler)73 public XMLFileReader setHandler(SimpleHandler simpleHandler) { 74 this.simpleHandler = simpleHandler; 75 return this; 76 } 77 78 /** 79 * Read an XML file. Return a list of alternating items, where the even items are the paths, 80 * and the odd ones are values. The order of the elements matches what was in the file. 81 * 82 * @param fileName 83 * file to open 84 * @param handlers 85 * a set of values for the handlers to use, eg CONTENT_HANDLER | ERROR_HANDLER 86 * @param validating 87 * if a validating parse is requested 88 * @return list of alternating values. 89 */ read(String fileName, int handlers, boolean validating)90 public XMLFileReader read(String fileName, int handlers, boolean validating) { 91 try { 92 InputStream fis = new FileInputStream(fileName); 93 fis = new FilterBomInputStream(fis); 94 return read(fileName, fis, handlers, validating); 95 } catch (IOException e) { 96 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fileName).initCause(e); 97 } 98 } 99 100 /** 101 * read from a Stream 102 * @param fileName 103 * @param handlers 104 * @param validating 105 * @param fis 106 * @return 107 */ read(String fileName, InputStream fis, int handlers, boolean validating)108 public XMLFileReader read(String fileName, InputStream fis, int handlers, boolean validating) { 109 return read(fileName, new InputStreamReader(fis, Charset.forName("UTF-8")), handlers, validating); 110 } 111 112 /** 113 * read from a CLDR resource 114 * @param fileName 115 * @param handlers 116 * @param validating 117 * @param fis 118 * @see CldrUtility#getInputStream(String) 119 * @return 120 */ readCLDRResource(String resName, int handlers, boolean validating)121 public XMLFileReader readCLDRResource(String resName, int handlers, boolean validating) { 122 123 return read(resName, CldrUtility.getInputStream(resName), handlers, validating); 124 } 125 126 /** 127 * read from an arbitrary 128 * @param fileName 129 * @param handlers 130 * @param validating 131 * @param fis 132 * @see CldrUtility#getInputStream(String) 133 * @return 134 */ read(String resName, Class<?> callingClass, int handlers, boolean validating)135 public XMLFileReader read(String resName, Class<?> callingClass, int handlers, boolean validating) { 136 137 return read(resName, CldrUtility.getInputStream(callingClass, resName), handlers, validating); 138 } 139 read(String systemID, Reader reader, int handlers, boolean validating)140 public XMLFileReader read(String systemID, Reader reader, int handlers, boolean validating) { 141 try { 142 XMLReader xmlReader = createXMLReader(validating); 143 DEFAULT_DECLHANDLER.reset(); 144 if ((handlers & CONTENT_HANDLER) != 0) { 145 xmlReader.setContentHandler(DEFAULT_DECLHANDLER); 146 } 147 if ((handlers & ERROR_HANDLER) != 0) { 148 xmlReader.setErrorHandler(DEFAULT_DECLHANDLER); 149 } 150 if ((handlers & LEXICAL_HANDLER) != 0) { 151 xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", DEFAULT_DECLHANDLER); 152 } 153 if ((handlers & DECLARATION_HANDLER) != 0) { 154 xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", DEFAULT_DECLHANDLER); 155 } 156 InputSource is = new InputSource(reader); 157 is.setSystemId(systemID); 158 try { 159 xmlReader.parse(is); 160 } catch (AbortException e) { 161 } // ok 162 reader.close(); 163 return this; 164 } catch (SAXParseException e) { 165 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID + "\tline:\t" 166 + e.getLineNumber()).initCause(e); 167 } catch (SAXException e) { 168 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e); 169 } catch (IOException e) { 170 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e); 171 } 172 } 173 174 private class MyContentHandler implements ContentHandler, LexicalHandler, DeclHandler, ErrorHandler { 175 StringBuffer chars = new StringBuffer(); 176 StringBuffer commentChars = new StringBuffer(); 177 Stack<String> startElements = new Stack<String>(); 178 StringBuffer tempPath = new StringBuffer(); 179 boolean lastIsStart = false; 180 reset()181 public void reset() { 182 chars.setLength(0); 183 tempPath = new StringBuffer("/"); 184 startElements.clear(); 185 startElements.push("/"); 186 } 187 characters(char[] ch, int start, int length)188 public void characters(char[] ch, int start, int length) throws SAXException { 189 if (lastIsStart) chars.append(ch, start, length); 190 } 191 startElement(String namespaceURI, String localName, String qName, Attributes atts)192 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 193 throws SAXException { 194 tempPath.setLength(0); 195 tempPath.append(startElements.peek()).append('/').append(qName); 196 for (int i = 0; i < atts.getLength(); ++i) { 197 tempPath.append("[@").append(atts.getQName(i)).append("=\"").append(atts.getValue(i).replace('"', '\'')).append("\"]"); 198 } 199 startElements.push(tempPath.toString()); 200 chars.setLength(0); // clear garbage 201 lastIsStart = true; 202 } 203 endElement(String namespaceURI, String localName, String qName)204 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 205 String startElement = (String) startElements.pop(); 206 if (lastIsStart) { 207 // System.out.println(startElement + ":" + chars); 208 simpleHandler.handlePathValue(startElement, chars.toString()); 209 } 210 chars.setLength(0); 211 lastIsStart = false; 212 } 213 startDTD(String name, String publicId, String systemId)214 public void startDTD(String name, String publicId, String systemId) throws SAXException { 215 if (SHOW_ALL) Log.logln("startDTD name: " + name 216 + ", publicId: " + publicId 217 + ", systemId: " + systemId); 218 simpleHandler.handleStartDtd(name, publicId, systemId); 219 } 220 endDTD()221 public void endDTD() throws SAXException { 222 if (SHOW_ALL) Log.logln("endDTD"); 223 simpleHandler.handleEndDtd(); 224 } 225 comment(char[] ch, int start, int length)226 public void comment(char[] ch, int start, int length) throws SAXException { 227 if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length)); 228 commentChars.append(ch, start, length); 229 simpleHandler.handleComment((String) startElements.peek(), commentChars.toString()); 230 commentChars.setLength(0); 231 } 232 elementDecl(String name, String model)233 public void elementDecl(String name, String model) throws SAXException { 234 simpleHandler.handleElementDecl(name, model); 235 } 236 attributeDecl(String eName, String aName, String type, String mode, String value)237 public void attributeDecl(String eName, String aName, String type, String mode, String value) 238 throws SAXException { 239 simpleHandler.handleAttributeDecl(eName, aName, type, mode, value); 240 } 241 242 // ==== The following are just for debuggin ===== 243 startDocument()244 public void startDocument() throws SAXException { 245 if (SHOW_ALL) Log.logln("startDocument"); 246 } 247 ignorableWhitespace(char[] ch, int start, int length)248 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { 249 if (SHOW_ALL) Log.logln("ignorableWhitespace length: " + length); 250 } 251 endDocument()252 public void endDocument() throws SAXException { 253 if (SHOW_ALL) Log.logln("endDocument"); 254 } 255 internalEntityDecl(String name, String value)256 public void internalEntityDecl(String name, String value) throws SAXException { 257 if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + value); 258 } 259 externalEntityDecl(String name, String publicId, String systemId)260 public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException { 261 if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId); 262 } 263 notationDecl(String name, String publicId, String systemId)264 public void notationDecl(String name, String publicId, String systemId) { 265 if (SHOW_ALL) Log.logln("notationDecl: " + name 266 + ", " + publicId 267 + ", " + systemId); 268 } 269 processingInstruction(String target, String data)270 public void processingInstruction(String target, String data) 271 throws SAXException { 272 if (SHOW_ALL) Log.logln("processingInstruction: " + target + ", " + data); 273 } 274 skippedEntity(String name)275 public void skippedEntity(String name) 276 throws SAXException { 277 if (SHOW_ALL) Log.logln("skippedEntity: " + name); 278 } 279 unparsedEntityDecl(String name, String publicId, String systemId, String notationName)280 public void unparsedEntityDecl(String name, String publicId, 281 String systemId, String notationName) { 282 if (SHOW_ALL) Log.logln("unparsedEntityDecl: " + name 283 + ", " + publicId 284 + ", " + systemId 285 + ", " + notationName); 286 } 287 setDocumentLocator(Locator locator)288 public void setDocumentLocator(Locator locator) { 289 if (SHOW_ALL) Log.logln("setDocumentLocator Locator " + locator); 290 } 291 startPrefixMapping(String prefix, String uri)292 public void startPrefixMapping(String prefix, String uri) throws SAXException { 293 if (SHOW_ALL) Log.logln("startPrefixMapping prefix: " + prefix + 294 ", uri: " + uri); 295 } 296 endPrefixMapping(String prefix)297 public void endPrefixMapping(String prefix) throws SAXException { 298 if (SHOW_ALL) Log.logln("endPrefixMapping prefix: " + prefix); 299 } 300 startEntity(String name)301 public void startEntity(String name) throws SAXException { 302 if (SHOW_ALL) Log.logln("startEntity name: " + name); 303 } 304 endEntity(String name)305 public void endEntity(String name) throws SAXException { 306 if (SHOW_ALL) Log.logln("endEntity name: " + name); 307 } 308 startCDATA()309 public void startCDATA() throws SAXException { 310 if (SHOW_ALL) Log.logln("startCDATA"); 311 } 312 endCDATA()313 public void endCDATA() throws SAXException { 314 if (SHOW_ALL) Log.logln("endCDATA"); 315 } 316 317 /* 318 * (non-Javadoc) 319 * 320 * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException) 321 */ error(SAXParseException exception)322 public void error(SAXParseException exception) throws SAXException { 323 if (SHOW_ALL) Log.logln("error: " + showSAX(exception)); 324 throw exception; 325 } 326 327 /* 328 * (non-Javadoc) 329 * 330 * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException) 331 */ fatalError(SAXParseException exception)332 public void fatalError(SAXParseException exception) throws SAXException { 333 if (SHOW_ALL) Log.logln("fatalError: " + showSAX(exception)); 334 throw exception; 335 } 336 337 /* 338 * (non-Javadoc) 339 * 340 * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException) 341 */ warning(SAXParseException exception)342 public void warning(SAXParseException exception) throws SAXException { 343 if (SHOW_ALL) Log.logln("warning: " + showSAX(exception)); 344 throw exception; 345 } 346 } 347 348 static final class AbortException extends RuntimeException { 349 private static final long serialVersionUID = 1L; 350 } 351 352 /** 353 * Show a SAX exception in a readable form. 354 */ showSAX(SAXParseException exception)355 public static String showSAX(SAXParseException exception) { 356 return exception.getMessage() 357 + ";\t SystemID: " + exception.getSystemId() 358 + ";\t PublicID: " + exception.getPublicId() 359 + ";\t LineNumber: " + exception.getLineNumber() 360 + ";\t ColumnNumber: " + exception.getColumnNumber(); 361 } 362 createXMLReader(boolean validating)363 public static XMLReader createXMLReader(boolean validating) { 364 // weiv 07/20/2007: The laundry list below is somewhat obsolete 365 // I have moved the system's default parser (instantiated when "" is 366 // passed) to the top, so that we will always use that. I have also 367 // removed "org.apache.crimson.parser.XMLReaderImpl" as this one gets 368 // confused regarding UTF-8 encoding name. 369 String[] testList = { 370 System.getProperty("CLDR_DEFAULT_SAX_PARSER", ""), // defaults to "", system default. 371 "org.apache.xerces.parsers.SAXParser", 372 "gnu.xml.aelfred2.XmlReader", 373 "com.bluecast.xml.Piccolo", 374 "oracle.xml.parser.v2.SAXParser" 375 }; 376 XMLReader result = null; 377 for (int i = 0; i < testList.length; ++i) { 378 try { 379 result = (testList[i].length() != 0) 380 ? XMLReaderFactory.createXMLReader(testList[i]) 381 : XMLReaderFactory.createXMLReader(); 382 result.setFeature("http://xml.org/sax/features/validation", validating); 383 break; 384 } catch (SAXException e1) { 385 } 386 } 387 if (result == null) 388 throw new NoClassDefFoundError("No SAX parser is available, or unable to set validation correctly"); 389 try { 390 result.setEntityResolver(new CachingEntityResolver()); 391 } catch (Throwable e) { 392 System.err 393 .println("WARNING: Can't set caching entity resolver - error " 394 + e.toString()); 395 e.printStackTrace(); 396 } 397 return result; 398 } 399 400 static final class DebuggingInputStream extends InputStream { 401 InputStream contents; 402 close()403 public void close() throws IOException { 404 contents.close(); 405 } 406 DebuggingInputStream(InputStream fis)407 public DebuggingInputStream(InputStream fis) { 408 contents = fis; 409 } 410 read()411 public int read() throws IOException { 412 int x = contents.read(); 413 System.out.println(Integer.toHexString(x) + ","); 414 return x; 415 } 416 } 417 418 static final class FilterBomInputStream extends InputStream { 419 InputStream contents; 420 boolean first = true; 421 close()422 public void close() throws IOException { 423 contents.close(); 424 } 425 FilterBomInputStream(InputStream fis)426 public FilterBomInputStream(InputStream fis) { 427 contents = fis; 428 } 429 read()430 public int read() throws IOException { 431 int x = contents.read(); 432 if (first) { 433 first = false; 434 // 0xEF,0xBB,0xBF 435 // SKIP bom 436 if (x == 0xEF) { 437 int y = contents.read(); 438 if (y == 0xBB) { 439 int z = contents.read(); 440 if (z == 0xBF) { 441 x = contents.read(); 442 } 443 } 444 } 445 } 446 return x; 447 } 448 } 449 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating)450 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating) { 451 return loadPathValues(filename, data, validating, false); 452 } 453 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full)454 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full) { 455 return loadPathValues(filename, data, validating, full, null); 456 } 457 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, Function<String, String> valueFilter)458 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, 459 Function<String, String> valueFilter) { 460 try { 461 new XMLFileReader() 462 .setHandler(new PathValueListHandler(data, full, valueFilter)) 463 .read(filename, -1, validating); 464 return data; 465 } catch (Exception e) { 466 throw new IllegalArgumentException(filename, e); 467 } 468 } 469 470 static final class PathValueListHandler extends SimpleHandler { 471 List<Pair<String, String>> data; 472 boolean full; 473 private Function<String, String> valueFilter; 474 PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter)475 public PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter) { 476 super(); 477 this.data = data != null ? data : new ArrayList<Pair<String, String>>(); 478 this.full = full; 479 this.valueFilter = valueFilter; 480 } 481 482 @Override handlePathValue(String path, String value)483 public void handlePathValue(String path, String value) { 484 if (valueFilter == null) { 485 data.add(Pair.of(path, value)); 486 } else { 487 String filteredValue = valueFilter.apply(value); 488 if (filteredValue != null) { 489 data.add(Pair.of(path, filteredValue)); 490 } 491 } 492 } 493 494 @Override handleComment(String path, String comment)495 public void handleComment(String path, String comment) { 496 if (!full || path.equals("/")) { 497 return; 498 } 499 data.add(Pair.of("!", comment)); 500 } 501 } 502 }