1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.FileInputStream; 12 import java.io.IOException; 13 import java.io.InputStream; 14 import java.io.Reader; 15 import java.util.ArrayList; 16 import java.util.List; 17 import java.util.Stack; 18 19 import org.xml.sax.Attributes; 20 import org.xml.sax.ContentHandler; 21 import org.xml.sax.ErrorHandler; 22 import org.xml.sax.InputSource; 23 import org.xml.sax.Locator; 24 import org.xml.sax.SAXException; 25 import org.xml.sax.SAXNotRecognizedException; 26 import org.xml.sax.SAXNotSupportedException; 27 import org.xml.sax.SAXParseException; 28 import org.xml.sax.XMLReader; 29 import org.xml.sax.ext.DeclHandler; 30 import org.xml.sax.ext.LexicalHandler; 31 import org.xml.sax.helpers.XMLReaderFactory; 32 33 import com.google.common.base.Function; 34 import com.ibm.icu.util.ICUException; 35 import com.ibm.icu.util.ICUUncheckedIOException; 36 37 /** 38 * Convenience class to make reading XML data files easier. The main method is read(); 39 * This is meant for XML data files, so the contents of elements must either be all other elements, or 40 * just text. It is thus not suitable for XML files with MIXED content; 41 * all text content in a mixed element is discarded. 42 * 43 * @author davis 44 */ 45 public class XMLFileReader { 46 static final boolean SHOW_ALL = false; 47 /** 48 * Handlers to use in read() 49 */ 50 public static int CONTENT_HANDLER = 1, ERROR_HANDLER = 2, LEXICAL_HANDLER = 4, DECLARATION_HANDLER = 8; 51 52 private MyContentHandler DEFAULT_DECLHANDLER = new MyContentHandler(); 53 // TODO Add way to skip gathering value contents 54 // private ElementOnlyContentHandler ELEMENT_ONLY_DECLHANDLER = new ElementOnlyContentHandler(); 55 private SimpleHandler simpleHandler; 56 57 public static class SimpleHandler { handlePathValue(String path, String value)58 public void handlePathValue(String path, String value) { 59 } 60 handleComment(String path, String comment)61 public void handleComment(String path, String comment) { 62 } 63 handleElementDecl(String name, String model)64 public void handleElementDecl(String name, String model) { 65 } 66 handleAttributeDecl(String eName, String aName, String type, String mode, String value)67 public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) { 68 } 69 handleEndDtd()70 public void handleEndDtd() { 71 } 72 handleStartDtd(String name, String publicId, String systemId)73 public void handleStartDtd(String name, String publicId, String systemId) { 74 } 75 } 76 setHandler(SimpleHandler simpleHandler)77 public XMLFileReader setHandler(SimpleHandler simpleHandler) { 78 this.simpleHandler = simpleHandler; 79 return this; 80 } 81 82 /** 83 * Read an XML file. The order of the elements matches what was in the file. 84 * 85 * @param fileName 86 * file to open 87 * @param handlers 88 * a set of values for the handlers to use, eg CONTENT_HANDLER | ERROR_HANDLER 89 * @param validating 90 * if a validating parse is requested 91 * @return list of alternating values. 92 */ read(String fileName, int handlers, boolean validating)93 public XMLFileReader read(String fileName, int handlers, boolean validating) { 94 try (InputStream fis = new FileInputStream(fileName); 95 ) { 96 return read(fileName, new InputSource(fis), handlers, validating); 97 } catch (IOException e) { 98 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fileName).initCause(e); 99 } 100 } 101 102 103 /** 104 * read from a CLDR resource 105 * @param fileName 106 * @param handlers 107 * @param validating 108 * @param fis 109 * @see CldrUtility#getInputStream(String) 110 * @return 111 */ readCLDRResource(String resName, int handlers, boolean validating)112 public XMLFileReader readCLDRResource(String resName, int handlers, boolean validating) { 113 try (InputStream inputStream = CldrUtility.getInputStream(resName)) { 114 return read(resName, new InputSource(inputStream), handlers, validating); 115 } catch (IOException e) { 116 throw new ICUUncheckedIOException(e); 117 } 118 } 119 120 /** 121 * read from an arbitrary 122 * @param fileName 123 * @param handlers 124 * @param validating 125 * @param fis 126 * @see CldrUtility#getInputStream(String) 127 * @return 128 */ read(String resName, Class<?> callingClass, int handlers, boolean validating)129 public XMLFileReader read(String resName, Class<?> callingClass, int handlers, boolean validating) { 130 try (InputStream inputStream = CldrUtility.getInputStream(callingClass, resName)) { 131 return read(resName, new InputSource(inputStream), handlers, validating); 132 } catch (IOException e) { 133 throw new ICUUncheckedIOException(e); 134 } 135 } 136 read(String systemID, Reader reader, int handlers, boolean validating)137 public XMLFileReader read(String systemID, Reader reader, int handlers, boolean validating) { 138 read(systemID, reader, handlers, validating, DEFAULT_DECLHANDLER.reset()); 139 return this; 140 } 141 read(String systemID, InputSource insrc, int handlers, boolean validating)142 public XMLFileReader read(String systemID, InputSource insrc, int handlers, boolean validating) { 143 read(systemID, insrc, handlers, validating, DEFAULT_DECLHANDLER.reset()); 144 return this; 145 } 146 read(String systemID, InputStream instr, int handlers, boolean validating, AllHandler allHandler)147 public static void read(String systemID, InputStream instr, int handlers, boolean validating, AllHandler allHandler) { 148 InputSource is = new InputSource(instr); 149 read(systemID, is, handlers, validating, allHandler); 150 } 151 read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler)152 public static void read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler) { 153 InputSource is = new InputSource(reader); 154 read(systemID, is, handlers, validating, allHandler); 155 } 156 read(String systemID, InputSource is, int handlers, boolean validating, AllHandler allHandler)157 public static void read(String systemID, InputSource is, int handlers, boolean validating, AllHandler allHandler) { 158 try { 159 XMLReader xmlReader = createXMLReader(handlers, validating, allHandler); 160 is.setSystemId(systemID); 161 try { 162 xmlReader.parse(is); 163 } catch (AbortException e) { 164 } // ok 165 } catch (SAXParseException e) { 166 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID + "\tline:\t" 167 + e.getLineNumber()).initCause(e); 168 } catch (SAXException | IOException e) { 169 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e); 170 } 171 } 172 createXMLReader(int handlers, boolean validating, AllHandler allHandler)173 private static final XMLReader createXMLReader(int handlers, boolean validating, AllHandler allHandler) throws SAXNotRecognizedException, SAXNotSupportedException { 174 XMLReader xmlReader = createXMLReader(validating); 175 if ((handlers & CONTENT_HANDLER) != 0) { 176 xmlReader.setContentHandler(allHandler); 177 } 178 if ((handlers & ERROR_HANDLER) != 0) { 179 xmlReader.setErrorHandler(allHandler); 180 } 181 if ((handlers & LEXICAL_HANDLER) != 0) { 182 xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", allHandler); 183 } 184 if ((handlers & DECLARATION_HANDLER) != 0) { 185 xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", allHandler); 186 } 187 return xmlReader; 188 } 189 190 public interface AllHandler extends ContentHandler, LexicalHandler, DeclHandler, ErrorHandler { 191 192 } 193 194 195 /** Basis for handlers that provides for logging, with no actions on methods 196 */ 197 static public class LoggingHandler implements AllHandler { 198 @Override startDocument()199 public void startDocument() throws SAXException { 200 if (SHOW_ALL) Log.logln("startDocument"); 201 } 202 203 @Override characters(char[] ch, int start, int length)204 public void characters(char[] ch, int start, int length) throws SAXException { 205 if (SHOW_ALL) Log.logln("characters"); 206 } 207 208 @Override startElement(String namespaceURI, String localName, String qName, Attributes atts)209 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 210 throws SAXException { 211 if (SHOW_ALL) Log.logln("startElement"); 212 } 213 214 @Override endElement(String namespaceURI, String localName, String qName)215 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 216 if (SHOW_ALL) Log.logln("endElement"); 217 } 218 219 @Override startDTD(String name, String publicId, String systemId)220 public void startDTD(String name, String publicId, String systemId) throws SAXException { 221 if (SHOW_ALL) Log.logln("startDTD"); 222 } 223 224 @Override endDTD()225 public void endDTD() throws SAXException { 226 if (SHOW_ALL) Log.logln("endDTD"); 227 } 228 229 @Override comment(char[] ch, int start, int length)230 public void comment(char[] ch, int start, int length) throws SAXException { 231 if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length)); 232 } 233 234 @Override elementDecl(String name, String model)235 public void elementDecl(String name, String model) throws SAXException { 236 if (SHOW_ALL) Log.logln("elementDecl"); 237 } 238 239 @Override attributeDecl(String eName, String aName, String type, String mode, String value)240 public void attributeDecl(String eName, String aName, String type, String mode, String value) 241 throws SAXException { 242 if (SHOW_ALL) Log.logln("attributeDecl"); 243 } 244 245 @Override ignorableWhitespace(char[] ch, int start, int length)246 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { 247 if (SHOW_ALL) Log.logln("ignorableWhitespace length: " + length); 248 } 249 250 @Override endDocument()251 public void endDocument() throws SAXException { 252 if (SHOW_ALL) Log.logln("endDocument"); 253 } 254 255 @Override internalEntityDecl(String name, String value)256 public void internalEntityDecl(String name, String value) throws SAXException { 257 if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + value); 258 } 259 260 @Override externalEntityDecl(String name, String publicId, String systemId)261 public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException { 262 if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId); 263 } 264 notationDecl(String name, String publicId, String systemId)265 public void notationDecl(String name, String publicId, String systemId) { 266 if (SHOW_ALL) Log.logln("notationDecl: " + name 267 + ", " + publicId 268 + ", " + systemId); 269 } 270 271 @Override processingInstruction(String target, String data)272 public void processingInstruction(String target, String data) 273 throws SAXException { 274 if (SHOW_ALL) Log.logln("processingInstruction: " + target + ", " + data); 275 } 276 277 @Override skippedEntity(String name)278 public void skippedEntity(String name) 279 throws SAXException { 280 if (SHOW_ALL) Log.logln("skippedEntity: " + name); 281 } 282 unparsedEntityDecl(String name, String publicId, String systemId, String notationName)283 public void unparsedEntityDecl(String name, String publicId, 284 String systemId, String notationName) { 285 if (SHOW_ALL) Log.logln("unparsedEntityDecl: " + name 286 + ", " + publicId 287 + ", " + systemId 288 + ", " + notationName); 289 } 290 291 @Override setDocumentLocator(Locator locator)292 public void setDocumentLocator(Locator locator) { 293 if (SHOW_ALL) Log.logln("setDocumentLocator Locator " + locator); 294 } 295 296 @Override startPrefixMapping(String prefix, String uri)297 public void startPrefixMapping(String prefix, String uri) throws SAXException { 298 if (SHOW_ALL) Log.logln("startPrefixMapping prefix: " + prefix + 299 ", uri: " + uri); 300 } 301 302 @Override endPrefixMapping(String prefix)303 public void endPrefixMapping(String prefix) throws SAXException { 304 if (SHOW_ALL) Log.logln("endPrefixMapping prefix: " + prefix); 305 } 306 307 @Override startEntity(String name)308 public void startEntity(String name) throws SAXException { 309 if (SHOW_ALL) Log.logln("startEntity name: " + name); 310 } 311 312 @Override endEntity(String name)313 public void endEntity(String name) throws SAXException { 314 if (SHOW_ALL) Log.logln("endEntity name: " + name); 315 } 316 317 @Override startCDATA()318 public void startCDATA() throws SAXException { 319 if (SHOW_ALL) Log.logln("startCDATA"); 320 } 321 322 @Override endCDATA()323 public void endCDATA() throws SAXException { 324 if (SHOW_ALL) Log.logln("endCDATA"); 325 } 326 327 /* 328 * (non-Javadoc) 329 * 330 * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException) 331 */ 332 @Override error(SAXParseException exception)333 public void error(SAXParseException exception) throws SAXException { 334 if (SHOW_ALL) Log.logln("error: " + showSAX(exception)); 335 throw exception; 336 } 337 338 /* 339 * (non-Javadoc) 340 * 341 * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException) 342 */ 343 @Override fatalError(SAXParseException exception)344 public void fatalError(SAXParseException exception) throws SAXException { 345 if (SHOW_ALL) Log.logln("fatalError: " + showSAX(exception)); 346 throw exception; 347 } 348 349 /* 350 * (non-Javadoc) 351 * 352 * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException) 353 */ 354 @Override warning(SAXParseException exception)355 public void warning(SAXParseException exception) throws SAXException { 356 if (SHOW_ALL) Log.logln("warning: " + showSAX(exception)); 357 throw exception; 358 } 359 360 } 361 362 public class MyContentHandler extends LoggingHandler { 363 StringBuffer chars = new StringBuffer(); 364 StringBuffer commentChars = new StringBuffer(); 365 Stack<String> startElements = new Stack<>(); 366 StringBuffer tempPath = new StringBuffer(); 367 boolean lastIsStart = false; 368 reset()369 public MyContentHandler reset() { 370 chars.setLength(0); 371 tempPath = new StringBuffer("/"); 372 startElements.clear(); 373 startElements.push("/"); 374 return this; 375 } 376 377 @Override characters(char[] ch, int start, int length)378 public void characters(char[] ch, int start, int length) throws SAXException { 379 if (lastIsStart) chars.append(ch, start, length); 380 } 381 382 @Override startElement(String namespaceURI, String localName, String qName, Attributes atts)383 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 384 throws SAXException { 385 tempPath.setLength(0); 386 tempPath.append(startElements.peek()).append('/').append(qName); 387 for (int i = 0; i < atts.getLength(); ++i) { 388 tempPath.append("[@").append(atts.getQName(i)).append("=\"").append(atts.getValue(i).replace('"', '\'')).append("\"]"); 389 } 390 startElements.push(tempPath.toString()); 391 chars.setLength(0); // clear garbage 392 lastIsStart = true; 393 } 394 395 @Override endElement(String namespaceURI, String localName, String qName)396 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 397 String startElement = startElements.pop(); 398 if (lastIsStart) { 399 // System.out.println(startElement + ":" + chars); 400 simpleHandler.handlePathValue(startElement, chars.toString()); 401 } 402 chars.setLength(0); 403 lastIsStart = false; 404 } 405 406 @Override startDTD(String name, String publicId, String systemId)407 public void startDTD(String name, String publicId, String systemId) throws SAXException { 408 if (SHOW_ALL) Log.logln("startDTD name: " + name 409 + ", publicId: " + publicId 410 + ", systemId: " + systemId); 411 simpleHandler.handleStartDtd(name, publicId, systemId); 412 } 413 414 @Override endDTD()415 public void endDTD() throws SAXException { 416 if (SHOW_ALL) Log.logln("endDTD"); 417 simpleHandler.handleEndDtd(); 418 } 419 420 @Override comment(char[] ch, int start, int length)421 public void comment(char[] ch, int start, int length) throws SAXException { 422 if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length)); 423 commentChars.append(ch, start, length); 424 simpleHandler.handleComment(startElements.peek(), commentChars.toString()); 425 commentChars.setLength(0); 426 } 427 428 @Override elementDecl(String name, String model)429 public void elementDecl(String name, String model) throws SAXException { 430 simpleHandler.handleElementDecl(name, model); 431 } 432 433 @Override attributeDecl(String eName, String aName, String type, String mode, String value)434 public void attributeDecl(String eName, String aName, String type, String mode, String value) 435 throws SAXException { 436 simpleHandler.handleAttributeDecl(eName, aName, type, mode, value); 437 } 438 439 } 440 441 static final class AbortException extends RuntimeException { 442 private static final long serialVersionUID = 1L; 443 } 444 445 /** 446 * Show a SAX exception in a readable form. 447 */ showSAX(SAXParseException exception)448 public static String showSAX(SAXParseException exception) { 449 return exception.getMessage() 450 + ";\t SystemID: " + exception.getSystemId() 451 + ";\t PublicID: " + exception.getPublicId() 452 + ";\t LineNumber: " + exception.getLineNumber() 453 + ";\t ColumnNumber: " + exception.getColumnNumber(); 454 } 455 createXMLReader(boolean validating)456 public static XMLReader createXMLReader(boolean validating) { 457 // weiv 07/20/2007: The laundry list below is somewhat obsolete 458 // I have moved the system's default parser (instantiated when "" is 459 // passed) to the top, so that we will always use that. I have also 460 // removed "org.apache.crimson.parser.XMLReaderImpl" as this one gets 461 // confused regarding UTF-8 encoding name. 462 String[] testList = { 463 System.getProperty("CLDR_DEFAULT_SAX_PARSER", ""), // defaults to "", system default. 464 "org.apache.xerces.parsers.SAXParser", 465 "gnu.xml.aelfred2.XmlReader", 466 "com.bluecast.xml.Piccolo", 467 "oracle.xml.parser.v2.SAXParser" 468 }; 469 XMLReader result = null; 470 for (int i = 0; i < testList.length; ++i) { 471 try { 472 result = (testList[i].length() != 0) 473 ? XMLReaderFactory.createXMLReader(testList[i]) 474 : XMLReaderFactory.createXMLReader(); 475 result.setFeature("http://xml.org/sax/features/validation", validating); 476 break; 477 } catch (SAXException e1) { 478 } 479 } 480 if (result == null) 481 throw new NoClassDefFoundError("No SAX parser is available, or unable to set validation correctly"); 482 return result; 483 } 484 485 static final class DebuggingInputStream extends InputStream { 486 InputStream contents; 487 488 @Override close()489 public void close() throws IOException { 490 contents.close(); 491 } 492 DebuggingInputStream(InputStream fis)493 public DebuggingInputStream(InputStream fis) { 494 contents = fis; 495 } 496 497 @Override read()498 public int read() throws IOException { 499 int x = contents.read(); 500 System.out.println(Integer.toHexString(x) + ","); 501 return x; 502 } 503 } 504 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating)505 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating) { 506 return loadPathValues(filename, data, validating, false); 507 } 508 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full)509 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full) { 510 return loadPathValues(filename, data, validating, full, null); 511 } 512 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, Function<String, String> valueFilter)513 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, 514 Function<String, String> valueFilter) { 515 try { 516 new XMLFileReader() 517 .setHandler(new PathValueListHandler(data, full, valueFilter)) 518 .read(filename, -1, validating); 519 return data; 520 } catch (Exception e) { 521 throw new ICUException(filename, e); 522 } 523 } 524 processPathValues(String filename, boolean validating, SimpleHandler simpleHandler)525 public static void processPathValues(String filename, boolean validating, SimpleHandler simpleHandler) { 526 try { 527 new XMLFileReader() 528 .setHandler(simpleHandler) 529 .read(filename, -1, validating); 530 } catch (Exception e) { 531 throw new ICUException(filename, e); 532 } 533 } 534 535 static final class PathValueListHandler extends SimpleHandler { 536 List<Pair<String, String>> data; 537 boolean full; 538 private Function<String, String> valueFilter; 539 PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter)540 public PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter) { 541 super(); 542 this.data = data != null ? data : new ArrayList<>(); 543 this.full = full; 544 this.valueFilter = valueFilter; 545 } 546 547 @Override handlePathValue(String path, String value)548 public void handlePathValue(String path, String value) { 549 if (valueFilter == null) { 550 data.add(Pair.of(path, value)); 551 } else { 552 String filteredValue = valueFilter.apply(value); 553 if (filteredValue != null) { 554 data.add(Pair.of(path, filteredValue)); 555 } 556 } 557 } 558 559 @Override handleComment(String path, String comment)560 public void handleComment(String path, String comment) { 561 if (!full || path.equals("/")) { 562 return; 563 } 564 data.add(Pair.of("!", comment)); 565 } 566 } 567 } 568