1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.FileInputStream; 12 import java.io.IOException; 13 import java.io.InputStream; 14 import java.io.InputStreamReader; 15 import java.io.Reader; 16 import java.nio.charset.Charset; 17 import java.util.ArrayList; 18 import java.util.List; 19 import java.util.Stack; 20 21 import org.xml.sax.Attributes; 22 import org.xml.sax.ContentHandler; 23 import org.xml.sax.ErrorHandler; 24 import org.xml.sax.InputSource; 25 import org.xml.sax.Locator; 26 import org.xml.sax.SAXException; 27 import org.xml.sax.SAXParseException; 28 import org.xml.sax.XMLReader; 29 import org.xml.sax.ext.DeclHandler; 30 import org.xml.sax.ext.LexicalHandler; 31 import org.xml.sax.helpers.XMLReaderFactory; 32 33 import com.google.common.base.Function; 34 import com.ibm.icu.util.ICUException; 35 import com.ibm.icu.util.ICUUncheckedIOException; 36 37 /** 38 * Convenience class to make reading XML data files easier. The main method is read(); 39 * This is meant for XML data files, so the contents of elements must either be all other elements, or 40 * just text. It is thus not suitable for XML files with MIXED content; 41 * all text content in a mixed element is discarded. 42 * 43 * @author davis 44 */ 45 public class XMLFileReader { 46 static final boolean SHOW_ALL = false; 47 /** 48 * Handlers to use in read() 49 */ 50 public static int CONTENT_HANDLER = 1, ERROR_HANDLER = 2, LEXICAL_HANDLER = 4, DECLARATION_HANDLER = 8; 51 52 private MyContentHandler DEFAULT_DECLHANDLER = new MyContentHandler(); 53 // TODO Add way to skip gathering value contents 54 // private ElementOnlyContentHandler ELEMENT_ONLY_DECLHANDLER = new ElementOnlyContentHandler(); 55 private SimpleHandler simpleHandler; 56 57 public static class SimpleHandler { handlePathValue(String path, String value)58 public void handlePathValue(String path, String value) { 59 } 60 handleComment(String path, String comment)61 public void handleComment(String path, String comment) { 62 } 63 handleElementDecl(String name, String model)64 public void handleElementDecl(String name, String model) { 65 } 66 handleAttributeDecl(String eName, String aName, String type, String mode, String value)67 public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) { 68 } 69 handleEndDtd()70 public void handleEndDtd() { 71 } 72 handleStartDtd(String name, String publicId, String systemId)73 public void handleStartDtd(String name, String publicId, String systemId) { 74 } 75 } 76 setHandler(SimpleHandler simpleHandler)77 public XMLFileReader setHandler(SimpleHandler simpleHandler) { 78 this.simpleHandler = simpleHandler; 79 return this; 80 } 81 82 /** 83 * Read an XML file. The order of the elements matches what was in the file. 84 * 85 * @param fileName 86 * file to open 87 * @param handlers 88 * a set of values for the handlers to use, eg CONTENT_HANDLER | ERROR_HANDLER 89 * @param validating 90 * if a validating parse is requested 91 * @return list of alternating values. 92 */ read(String fileName, int handlers, boolean validating)93 public XMLFileReader read(String fileName, int handlers, boolean validating) { 94 try (InputStream fis0 = new FileInputStream(fileName); 95 InputStream fis = new StripUTF8BOMInputStream(fis0); 96 ) { 97 return read(fileName, fis, handlers, validating); 98 } catch (IOException e) { 99 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fileName).initCause(e); 100 } 101 } 102 103 /** 104 * read from a Stream 105 * @param fileName 106 * @param handlers 107 * @param validating 108 * @param fis 109 * @return 110 */ read(String fileName, InputStream fis, int handlers, boolean validating)111 public XMLFileReader read(String fileName, InputStream fis, int handlers, boolean validating) { 112 try (InputStreamReader inputStreamReader = new InputStreamReader(fis, Charset.forName("UTF-8"))) { 113 return read(fileName, inputStreamReader, handlers, validating); 114 } catch (IOException e) { 115 throw new ICUUncheckedIOException(e); 116 } 117 } 118 119 /** 120 * read from a CLDR resource 121 * @param fileName 122 * @param handlers 123 * @param validating 124 * @param fis 125 * @see CldrUtility#getInputStream(String) 126 * @return 127 */ readCLDRResource(String resName, int handlers, boolean validating)128 public XMLFileReader readCLDRResource(String resName, int handlers, boolean validating) { 129 try (InputStream inputStream = CldrUtility.getInputStream(resName)) { 130 return read(resName, inputStream, handlers, validating); 131 } catch (IOException e) { 132 throw new ICUUncheckedIOException(e); 133 } 134 } 135 136 /** 137 * read from an arbitrary 138 * @param fileName 139 * @param handlers 140 * @param validating 141 * @param fis 142 * @see CldrUtility#getInputStream(String) 143 * @return 144 */ read(String resName, Class<?> callingClass, int handlers, boolean validating)145 public XMLFileReader read(String resName, Class<?> callingClass, int handlers, boolean validating) { 146 try (InputStream inputStream = CldrUtility.getInputStream(callingClass, resName)) { 147 return read(resName, inputStream, handlers, validating); 148 } catch (IOException e) { 149 throw new ICUUncheckedIOException(e); 150 } 151 } 152 read(String systemID, Reader reader, int handlers, boolean validating)153 public XMLFileReader read(String systemID, Reader reader, int handlers, boolean validating) { 154 read(systemID, reader, handlers, validating, DEFAULT_DECLHANDLER.reset()); 155 return this; 156 } 157 read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler)158 public static void read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler) { 159 try { 160 XMLReader xmlReader = createXMLReader(validating); 161 if ((handlers & CONTENT_HANDLER) != 0) { 162 xmlReader.setContentHandler(allHandler); 163 } 164 if ((handlers & ERROR_HANDLER) != 0) { 165 xmlReader.setErrorHandler(allHandler); 166 } 167 if ((handlers & LEXICAL_HANDLER) != 0) { 168 xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", allHandler); 169 } 170 if ((handlers & DECLARATION_HANDLER) != 0) { 171 xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", allHandler); 172 } 173 InputSource is = new InputSource(reader); 174 is.setSystemId(systemID); 175 try { 176 xmlReader.parse(is); 177 } catch (AbortException e) { 178 } // ok 179 reader.close(); 180 } catch (SAXParseException e) { 181 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID + "\tline:\t" 182 + e.getLineNumber()).initCause(e); 183 } catch (SAXException e) { 184 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e); 185 } catch (IOException e) { 186 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e); 187 } 188 } 189 190 public interface AllHandler extends ContentHandler, LexicalHandler, DeclHandler, ErrorHandler { 191 192 } 193 194 195 /** Basis for handlers that provides for logging, with no actions on methods 196 */ 197 static public class LoggingHandler implements AllHandler { 198 @Override startDocument()199 public void startDocument() throws SAXException { 200 if (SHOW_ALL) Log.logln("startDocument"); 201 } 202 203 @Override characters(char[] ch, int start, int length)204 public void characters(char[] ch, int start, int length) throws SAXException { 205 if (SHOW_ALL) Log.logln("characters"); 206 } 207 208 @Override startElement(String namespaceURI, String localName, String qName, Attributes atts)209 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 210 throws SAXException { 211 if (SHOW_ALL) Log.logln("startElement"); 212 } 213 214 @Override endElement(String namespaceURI, String localName, String qName)215 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 216 if (SHOW_ALL) Log.logln("endElement"); 217 } 218 219 @Override startDTD(String name, String publicId, String systemId)220 public void startDTD(String name, String publicId, String systemId) throws SAXException { 221 if (SHOW_ALL) Log.logln("startDTD"); 222 } 223 224 @Override endDTD()225 public void endDTD() throws SAXException { 226 if (SHOW_ALL) Log.logln("endDTD"); 227 } 228 229 @Override comment(char[] ch, int start, int length)230 public void comment(char[] ch, int start, int length) throws SAXException { 231 if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length)); 232 } 233 234 @Override elementDecl(String name, String model)235 public void elementDecl(String name, String model) throws SAXException { 236 if (SHOW_ALL) Log.logln("elementDecl"); 237 } 238 239 @Override attributeDecl(String eName, String aName, String type, String mode, String value)240 public void attributeDecl(String eName, String aName, String type, String mode, String value) 241 throws SAXException { 242 if (SHOW_ALL) Log.logln("attributeDecl"); 243 } 244 245 @Override ignorableWhitespace(char[] ch, int start, int length)246 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { 247 if (SHOW_ALL) Log.logln("ignorableWhitespace length: " + length); 248 } 249 250 @Override endDocument()251 public void endDocument() throws SAXException { 252 if (SHOW_ALL) Log.logln("endDocument"); 253 } 254 255 @Override internalEntityDecl(String name, String value)256 public void internalEntityDecl(String name, String value) throws SAXException { 257 if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + value); 258 } 259 260 @Override externalEntityDecl(String name, String publicId, String systemId)261 public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException { 262 if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId); 263 } 264 notationDecl(String name, String publicId, String systemId)265 public void notationDecl(String name, String publicId, String systemId) { 266 if (SHOW_ALL) Log.logln("notationDecl: " + name 267 + ", " + publicId 268 + ", " + systemId); 269 } 270 271 @Override processingInstruction(String target, String data)272 public void processingInstruction(String target, String data) 273 throws SAXException { 274 if (SHOW_ALL) Log.logln("processingInstruction: " + target + ", " + data); 275 } 276 277 @Override skippedEntity(String name)278 public void skippedEntity(String name) 279 throws SAXException { 280 if (SHOW_ALL) Log.logln("skippedEntity: " + name); 281 } 282 unparsedEntityDecl(String name, String publicId, String systemId, String notationName)283 public void unparsedEntityDecl(String name, String publicId, 284 String systemId, String notationName) { 285 if (SHOW_ALL) Log.logln("unparsedEntityDecl: " + name 286 + ", " + publicId 287 + ", " + systemId 288 + ", " + notationName); 289 } 290 291 @Override setDocumentLocator(Locator locator)292 public void setDocumentLocator(Locator locator) { 293 if (SHOW_ALL) Log.logln("setDocumentLocator Locator " + locator); 294 } 295 296 @Override startPrefixMapping(String prefix, String uri)297 public void startPrefixMapping(String prefix, String uri) throws SAXException { 298 if (SHOW_ALL) Log.logln("startPrefixMapping prefix: " + prefix + 299 ", uri: " + uri); 300 } 301 302 @Override endPrefixMapping(String prefix)303 public void endPrefixMapping(String prefix) throws SAXException { 304 if (SHOW_ALL) Log.logln("endPrefixMapping prefix: " + prefix); 305 } 306 307 @Override startEntity(String name)308 public void startEntity(String name) throws SAXException { 309 if (SHOW_ALL) Log.logln("startEntity name: " + name); 310 } 311 312 @Override endEntity(String name)313 public void endEntity(String name) throws SAXException { 314 if (SHOW_ALL) Log.logln("endEntity name: " + name); 315 } 316 317 @Override startCDATA()318 public void startCDATA() throws SAXException { 319 if (SHOW_ALL) Log.logln("startCDATA"); 320 } 321 322 @Override endCDATA()323 public void endCDATA() throws SAXException { 324 if (SHOW_ALL) Log.logln("endCDATA"); 325 } 326 327 /* 328 * (non-Javadoc) 329 * 330 * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException) 331 */ 332 @Override error(SAXParseException exception)333 public void error(SAXParseException exception) throws SAXException { 334 if (SHOW_ALL) Log.logln("error: " + showSAX(exception)); 335 throw exception; 336 } 337 338 /* 339 * (non-Javadoc) 340 * 341 * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException) 342 */ 343 @Override fatalError(SAXParseException exception)344 public void fatalError(SAXParseException exception) throws SAXException { 345 if (SHOW_ALL) Log.logln("fatalError: " + showSAX(exception)); 346 throw exception; 347 } 348 349 /* 350 * (non-Javadoc) 351 * 352 * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException) 353 */ 354 @Override warning(SAXParseException exception)355 public void warning(SAXParseException exception) throws SAXException { 356 if (SHOW_ALL) Log.logln("warning: " + showSAX(exception)); 357 throw exception; 358 } 359 360 } 361 362 public class MyContentHandler extends LoggingHandler { 363 StringBuffer chars = new StringBuffer(); 364 StringBuffer commentChars = new StringBuffer(); 365 Stack<String> startElements = new Stack<>(); 366 StringBuffer tempPath = new StringBuffer(); 367 boolean lastIsStart = false; 368 reset()369 public MyContentHandler reset() { 370 chars.setLength(0); 371 tempPath = new StringBuffer("/"); 372 startElements.clear(); 373 startElements.push("/"); 374 return this; 375 } 376 377 @Override characters(char[] ch, int start, int length)378 public void characters(char[] ch, int start, int length) throws SAXException { 379 if (lastIsStart) chars.append(ch, start, length); 380 } 381 382 @Override startElement(String namespaceURI, String localName, String qName, Attributes atts)383 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 384 throws SAXException { 385 tempPath.setLength(0); 386 tempPath.append(startElements.peek()).append('/').append(qName); 387 for (int i = 0; i < atts.getLength(); ++i) { 388 tempPath.append("[@").append(atts.getQName(i)).append("=\"").append(atts.getValue(i).replace('"', '\'')).append("\"]"); 389 } 390 startElements.push(tempPath.toString()); 391 chars.setLength(0); // clear garbage 392 lastIsStart = true; 393 } 394 395 @Override endElement(String namespaceURI, String localName, String qName)396 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 397 String startElement = startElements.pop(); 398 if (lastIsStart) { 399 // System.out.println(startElement + ":" + chars); 400 simpleHandler.handlePathValue(startElement, chars.toString()); 401 } 402 chars.setLength(0); 403 lastIsStart = false; 404 } 405 406 @Override startDTD(String name, String publicId, String systemId)407 public void startDTD(String name, String publicId, String systemId) throws SAXException { 408 if (SHOW_ALL) Log.logln("startDTD name: " + name 409 + ", publicId: " + publicId 410 + ", systemId: " + systemId); 411 simpleHandler.handleStartDtd(name, publicId, systemId); 412 } 413 414 @Override endDTD()415 public void endDTD() throws SAXException { 416 if (SHOW_ALL) Log.logln("endDTD"); 417 simpleHandler.handleEndDtd(); 418 } 419 420 @Override comment(char[] ch, int start, int length)421 public void comment(char[] ch, int start, int length) throws SAXException { 422 if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length)); 423 commentChars.append(ch, start, length); 424 simpleHandler.handleComment(startElements.peek(), commentChars.toString()); 425 commentChars.setLength(0); 426 } 427 428 @Override elementDecl(String name, String model)429 public void elementDecl(String name, String model) throws SAXException { 430 simpleHandler.handleElementDecl(name, model); 431 } 432 433 @Override attributeDecl(String eName, String aName, String type, String mode, String value)434 public void attributeDecl(String eName, String aName, String type, String mode, String value) 435 throws SAXException { 436 simpleHandler.handleAttributeDecl(eName, aName, type, mode, value); 437 } 438 439 } 440 441 static final class AbortException extends RuntimeException { 442 private static final long serialVersionUID = 1L; 443 } 444 445 /** 446 * Show a SAX exception in a readable form. 447 */ showSAX(SAXParseException exception)448 public static String showSAX(SAXParseException exception) { 449 return exception.getMessage() 450 + ";\t SystemID: " + exception.getSystemId() 451 + ";\t PublicID: " + exception.getPublicId() 452 + ";\t LineNumber: " + exception.getLineNumber() 453 + ";\t ColumnNumber: " + exception.getColumnNumber(); 454 } 455 createXMLReader(boolean validating)456 public static XMLReader createXMLReader(boolean validating) { 457 // weiv 07/20/2007: The laundry list below is somewhat obsolete 458 // I have moved the system's default parser (instantiated when "" is 459 // passed) to the top, so that we will always use that. I have also 460 // removed "org.apache.crimson.parser.XMLReaderImpl" as this one gets 461 // confused regarding UTF-8 encoding name. 462 String[] testList = { 463 System.getProperty("CLDR_DEFAULT_SAX_PARSER", ""), // defaults to "", system default. 464 "org.apache.xerces.parsers.SAXParser", 465 "gnu.xml.aelfred2.XmlReader", 466 "com.bluecast.xml.Piccolo", 467 "oracle.xml.parser.v2.SAXParser" 468 }; 469 XMLReader result = null; 470 for (int i = 0; i < testList.length; ++i) { 471 try { 472 result = (testList[i].length() != 0) 473 ? XMLReaderFactory.createXMLReader(testList[i]) 474 : XMLReaderFactory.createXMLReader(); 475 result.setFeature("http://xml.org/sax/features/validation", validating); 476 break; 477 } catch (SAXException e1) { 478 } 479 } 480 if (result == null) 481 throw new NoClassDefFoundError("No SAX parser is available, or unable to set validation correctly"); 482 return result; 483 } 484 485 static final class DebuggingInputStream extends InputStream { 486 InputStream contents; 487 488 @Override close()489 public void close() throws IOException { 490 contents.close(); 491 } 492 DebuggingInputStream(InputStream fis)493 public DebuggingInputStream(InputStream fis) { 494 contents = fis; 495 } 496 497 @Override read()498 public int read() throws IOException { 499 int x = contents.read(); 500 System.out.println(Integer.toHexString(x) + ","); 501 return x; 502 } 503 } 504 505 // class StripUTF8BOMInputStream does the same thing 506 // public static final class FilterBomInputStream extends InputStream { 507 // InputStream contents; 508 // boolean first = true; 509 // 510 // @Override 511 // public void close() throws IOException { 512 // contents.close(); 513 // } 514 // 515 // public FilterBomInputStream(InputStream fis) { 516 // contents = fis; 517 // } 518 // 519 // @Override 520 // public int read() throws IOException { 521 // int x = contents.read(); 522 // if (first) { 523 // first = false; 524 // // 0xEF,0xBB,0xBF 525 // // SKIP bom 526 // if (x == 0xEF) { 527 // int y = contents.read(); 528 // if (y == 0xBB) { 529 // int z = contents.read(); 530 // if (z == 0xBF) { 531 // x = contents.read(); 532 // } 533 // } 534 // } 535 // } 536 // return x; 537 // } 538 // } 539 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating)540 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating) { 541 return loadPathValues(filename, data, validating, false); 542 } 543 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full)544 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full) { 545 return loadPathValues(filename, data, validating, full, null); 546 } 547 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, Function<String, String> valueFilter)548 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, 549 Function<String, String> valueFilter) { 550 try { 551 new XMLFileReader() 552 .setHandler(new PathValueListHandler(data, full, valueFilter)) 553 .read(filename, -1, validating); 554 return data; 555 } catch (Exception e) { 556 throw new ICUException(filename, e); 557 } 558 } 559 processPathValues(String filename, boolean validating, SimpleHandler simpleHandler)560 public static void processPathValues(String filename, boolean validating, SimpleHandler simpleHandler) { 561 try { 562 new XMLFileReader() 563 .setHandler(simpleHandler) 564 .read(filename, -1, validating); 565 } catch (Exception e) { 566 throw new ICUException(filename, e); 567 } 568 } 569 570 static final class PathValueListHandler extends SimpleHandler { 571 List<Pair<String, String>> data; 572 boolean full; 573 private Function<String, String> valueFilter; 574 PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter)575 public PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter) { 576 super(); 577 this.data = data != null ? data : new ArrayList<>(); 578 this.full = full; 579 this.valueFilter = valueFilter; 580 } 581 582 @Override handlePathValue(String path, String value)583 public void handlePathValue(String path, String value) { 584 if (valueFilter == null) { 585 data.add(Pair.of(path, value)); 586 } else { 587 String filteredValue = valueFilter.apply(value); 588 if (filteredValue != null) { 589 data.add(Pair.of(path, filteredValue)); 590 } 591 } 592 } 593 594 @Override handleComment(String path, String comment)595 public void handleComment(String path, String comment) { 596 if (!full || path.equals("/")) { 597 return; 598 } 599 data.add(Pair.of("!", comment)); 600 } 601 } 602 } 603