1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.File; 12 import java.io.FileInputStream; 13 import java.io.IOException; 14 import java.io.InputStream; 15 import java.io.Reader; 16 import java.util.ArrayList; 17 import java.util.List; 18 import java.util.Stack; 19 20 import org.xml.sax.Attributes; 21 import org.xml.sax.ContentHandler; 22 import org.xml.sax.ErrorHandler; 23 import org.xml.sax.InputSource; 24 import org.xml.sax.Locator; 25 import org.xml.sax.SAXException; 26 import org.xml.sax.SAXNotRecognizedException; 27 import org.xml.sax.SAXNotSupportedException; 28 import org.xml.sax.SAXParseException; 29 import org.xml.sax.XMLReader; 30 import org.xml.sax.ext.DeclHandler; 31 import org.xml.sax.ext.LexicalHandler; 32 import org.xml.sax.helpers.XMLReaderFactory; 33 34 import com.google.common.base.Function; 35 import com.ibm.icu.util.ICUException; 36 import com.ibm.icu.util.ICUUncheckedIOException; 37 38 /** 39 * Convenience class to make reading XML data files easier. The main method is read(); 40 * This is meant for XML data files, so the contents of elements must either be all other elements, or 41 * just text. It is thus not suitable for XML files with MIXED content; 42 * all text content in a mixed element is discarded. 43 * 44 * @author davis 45 */ 46 public class XMLFileReader { 47 static final boolean SHOW_ALL = false; 48 /** 49 * Handlers to use in read() 50 */ 51 public static int CONTENT_HANDLER = 1, ERROR_HANDLER = 2, LEXICAL_HANDLER = 4, DECLARATION_HANDLER = 8; 52 53 private MyContentHandler DEFAULT_DECLHANDLER = new MyContentHandler(); 54 // TODO Add way to skip gathering value contents 55 // private ElementOnlyContentHandler ELEMENT_ONLY_DECLHANDLER = new ElementOnlyContentHandler(); 56 private SimpleHandler simpleHandler; 57 58 public static class SimpleHandler { handlePathValue(String path, String value)59 public void handlePathValue(String path, String value) { 60 } 61 handleComment(String path, String comment)62 public void handleComment(String path, String comment) { 63 } 64 handleElementDecl(String name, String model)65 public void handleElementDecl(String name, String model) { 66 } 67 handleAttributeDecl(String eName, String aName, String type, String mode, String value)68 public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) { 69 } 70 handleEndDtd()71 public void handleEndDtd() { 72 } 73 handleStartDtd(String name, String publicId, String systemId)74 public void handleStartDtd(String name, String publicId, String systemId) { 75 } 76 } 77 setHandler(SimpleHandler simpleHandler)78 public XMLFileReader setHandler(SimpleHandler simpleHandler) { 79 this.simpleHandler = simpleHandler; 80 return this; 81 } 82 83 /** 84 * Read an XML file. The order of the elements matches what was in the file. 85 * 86 * @param fileName 87 * file to open 88 * @param handlers 89 * a set of values for the handlers to use, eg CONTENT_HANDLER | ERROR_HANDLER 90 * @param validating 91 * if a validating parse is requested 92 * @return list of alternating values. 93 */ read(String fileName, int handlers, boolean validating)94 public XMLFileReader read(String fileName, int handlers, boolean validating) { 95 try (InputStream fis = new FileInputStream(fileName); 96 ) { 97 return read(fileName, new InputSource(fis), handlers, validating); 98 } catch (IOException e) { 99 File full = new File(fileName); 100 String fullName = fileName; 101 try { 102 fullName = full.getCanonicalPath(); 103 } catch (Exception IOException) { 104 } 105 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fullName).initCause(e); 106 } 107 } 108 109 110 /** 111 * read from a CLDR resource 112 * @param fileName 113 * @param handlers 114 * @param validating 115 * @param fis 116 * @see CldrUtility#getInputStream(String) 117 * @return 118 */ readCLDRResource(String resName, int handlers, boolean validating)119 public XMLFileReader readCLDRResource(String resName, int handlers, boolean validating) { 120 try (InputStream inputStream = CldrUtility.getInputStream(resName)) { 121 return read(resName, new InputSource(inputStream), handlers, validating); 122 } catch (IOException e) { 123 throw new ICUUncheckedIOException(e); 124 } 125 } 126 127 /** 128 * read from an arbitrary 129 * @param fileName 130 * @param handlers 131 * @param validating 132 * @param fis 133 * @see CldrUtility#getInputStream(String) 134 * @return 135 */ read(String resName, Class<?> callingClass, int handlers, boolean validating)136 public XMLFileReader read(String resName, Class<?> callingClass, int handlers, boolean validating) { 137 try (InputStream inputStream = CldrUtility.getInputStream(callingClass, resName)) { 138 return read(resName, new InputSource(inputStream), handlers, validating); 139 } catch (IOException e) { 140 throw new ICUUncheckedIOException(e); 141 } 142 } 143 read(String systemID, Reader reader, int handlers, boolean validating)144 public XMLFileReader read(String systemID, Reader reader, int handlers, boolean validating) { 145 read(systemID, reader, handlers, validating, DEFAULT_DECLHANDLER.reset()); 146 return this; 147 } 148 read(String systemID, InputSource insrc, int handlers, boolean validating)149 public XMLFileReader read(String systemID, InputSource insrc, int handlers, boolean validating) { 150 read(systemID, insrc, handlers, validating, DEFAULT_DECLHANDLER.reset()); 151 return this; 152 } 153 read(String systemID, InputStream instr, int handlers, boolean validating, AllHandler allHandler)154 public static void read(String systemID, InputStream instr, int handlers, boolean validating, AllHandler allHandler) { 155 InputSource is = new InputSource(instr); 156 read(systemID, is, handlers, validating, allHandler); 157 } 158 read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler)159 public static void read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler) { 160 InputSource is = new InputSource(reader); 161 read(systemID, is, handlers, validating, allHandler); 162 } 163 read(String systemID, InputSource is, int handlers, boolean validating, AllHandler allHandler)164 public static void read(String systemID, InputSource is, int handlers, boolean validating, AllHandler allHandler) { 165 try { 166 XMLReader xmlReader = createXMLReader(handlers, validating, allHandler); 167 is.setSystemId(systemID); 168 try { 169 xmlReader.parse(is); 170 } catch (AbortException e) { 171 } // ok 172 } catch (SAXParseException e) { 173 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID + "\tline:\t" 174 + e.getLineNumber()).initCause(e); 175 } catch (SAXException | IOException e) { 176 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e); 177 } 178 } 179 createXMLReader(int handlers, boolean validating, AllHandler allHandler)180 private static final XMLReader createXMLReader(int handlers, boolean validating, AllHandler allHandler) throws SAXNotRecognizedException, SAXNotSupportedException { 181 XMLReader xmlReader = createXMLReader(validating); 182 if ((handlers & CONTENT_HANDLER) != 0) { 183 xmlReader.setContentHandler(allHandler); 184 } 185 if ((handlers & ERROR_HANDLER) != 0) { 186 xmlReader.setErrorHandler(allHandler); 187 } 188 if ((handlers & LEXICAL_HANDLER) != 0) { 189 xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", allHandler); 190 } 191 if ((handlers & DECLARATION_HANDLER) != 0) { 192 xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", allHandler); 193 } 194 return xmlReader; 195 } 196 197 public interface AllHandler extends ContentHandler, LexicalHandler, DeclHandler, ErrorHandler { 198 199 } 200 201 202 /** Basis for handlers that provides for logging, with no actions on methods 203 */ 204 static public class LoggingHandler implements AllHandler { 205 @Override startDocument()206 public void startDocument() throws SAXException { 207 if (SHOW_ALL) Log.logln("startDocument"); 208 } 209 210 @Override characters(char[] ch, int start, int length)211 public void characters(char[] ch, int start, int length) throws SAXException { 212 if (SHOW_ALL) Log.logln("characters"); 213 } 214 215 @Override startElement(String namespaceURI, String localName, String qName, Attributes atts)216 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 217 throws SAXException { 218 if (SHOW_ALL) Log.logln("startElement"); 219 } 220 221 @Override endElement(String namespaceURI, String localName, String qName)222 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 223 if (SHOW_ALL) Log.logln("endElement"); 224 } 225 226 @Override startDTD(String name, String publicId, String systemId)227 public void startDTD(String name, String publicId, String systemId) throws SAXException { 228 if (SHOW_ALL) Log.logln("startDTD"); 229 } 230 231 @Override endDTD()232 public void endDTD() throws SAXException { 233 if (SHOW_ALL) Log.logln("endDTD"); 234 } 235 236 @Override comment(char[] ch, int start, int length)237 public void comment(char[] ch, int start, int length) throws SAXException { 238 if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length)); 239 } 240 241 @Override elementDecl(String name, String model)242 public void elementDecl(String name, String model) throws SAXException { 243 if (SHOW_ALL) Log.logln("elementDecl"); 244 } 245 246 @Override attributeDecl(String eName, String aName, String type, String mode, String value)247 public void attributeDecl(String eName, String aName, String type, String mode, String value) 248 throws SAXException { 249 if (SHOW_ALL) Log.logln("attributeDecl"); 250 } 251 252 @Override ignorableWhitespace(char[] ch, int start, int length)253 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { 254 if (SHOW_ALL) Log.logln("ignorableWhitespace length: " + length); 255 } 256 257 @Override endDocument()258 public void endDocument() throws SAXException { 259 if (SHOW_ALL) Log.logln("endDocument"); 260 } 261 262 @Override internalEntityDecl(String name, String value)263 public void internalEntityDecl(String name, String value) throws SAXException { 264 if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + value); 265 } 266 267 @Override externalEntityDecl(String name, String publicId, String systemId)268 public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException { 269 if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId); 270 } 271 notationDecl(String name, String publicId, String systemId)272 public void notationDecl(String name, String publicId, String systemId) { 273 if (SHOW_ALL) Log.logln("notationDecl: " + name 274 + ", " + publicId 275 + ", " + systemId); 276 } 277 278 @Override processingInstruction(String target, String data)279 public void processingInstruction(String target, String data) 280 throws SAXException { 281 if (SHOW_ALL) Log.logln("processingInstruction: " + target + ", " + data); 282 } 283 284 @Override skippedEntity(String name)285 public void skippedEntity(String name) 286 throws SAXException { 287 if (SHOW_ALL) Log.logln("skippedEntity: " + name); 288 } 289 unparsedEntityDecl(String name, String publicId, String systemId, String notationName)290 public void unparsedEntityDecl(String name, String publicId, 291 String systemId, String notationName) { 292 if (SHOW_ALL) Log.logln("unparsedEntityDecl: " + name 293 + ", " + publicId 294 + ", " + systemId 295 + ", " + notationName); 296 } 297 298 @Override setDocumentLocator(Locator locator)299 public void setDocumentLocator(Locator locator) { 300 if (SHOW_ALL) Log.logln("setDocumentLocator Locator " + locator); 301 } 302 303 @Override startPrefixMapping(String prefix, String uri)304 public void startPrefixMapping(String prefix, String uri) throws SAXException { 305 if (SHOW_ALL) Log.logln("startPrefixMapping prefix: " + prefix + 306 ", uri: " + uri); 307 } 308 309 @Override endPrefixMapping(String prefix)310 public void endPrefixMapping(String prefix) throws SAXException { 311 if (SHOW_ALL) Log.logln("endPrefixMapping prefix: " + prefix); 312 } 313 314 @Override startEntity(String name)315 public void startEntity(String name) throws SAXException { 316 if (SHOW_ALL) Log.logln("startEntity name: " + name); 317 } 318 319 @Override endEntity(String name)320 public void endEntity(String name) throws SAXException { 321 if (SHOW_ALL) Log.logln("endEntity name: " + name); 322 } 323 324 @Override startCDATA()325 public void startCDATA() throws SAXException { 326 if (SHOW_ALL) Log.logln("startCDATA"); 327 } 328 329 @Override endCDATA()330 public void endCDATA() throws SAXException { 331 if (SHOW_ALL) Log.logln("endCDATA"); 332 } 333 334 /* 335 * (non-Javadoc) 336 * 337 * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException) 338 */ 339 @Override error(SAXParseException exception)340 public void error(SAXParseException exception) throws SAXException { 341 if (SHOW_ALL) Log.logln("error: " + showSAX(exception)); 342 throw exception; 343 } 344 345 /* 346 * (non-Javadoc) 347 * 348 * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException) 349 */ 350 @Override fatalError(SAXParseException exception)351 public void fatalError(SAXParseException exception) throws SAXException { 352 if (SHOW_ALL) Log.logln("fatalError: " + showSAX(exception)); 353 throw exception; 354 } 355 356 /* 357 * (non-Javadoc) 358 * 359 * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException) 360 */ 361 @Override warning(SAXParseException exception)362 public void warning(SAXParseException exception) throws SAXException { 363 if (SHOW_ALL) Log.logln("warning: " + showSAX(exception)); 364 throw exception; 365 } 366 367 } 368 369 public class MyContentHandler extends LoggingHandler { 370 StringBuffer chars = new StringBuffer(); 371 StringBuffer commentChars = new StringBuffer(); 372 Stack<String> startElements = new Stack<>(); 373 StringBuffer tempPath = new StringBuffer(); 374 boolean lastIsStart = false; 375 reset()376 public MyContentHandler reset() { 377 chars.setLength(0); 378 tempPath = new StringBuffer("/"); 379 startElements.clear(); 380 startElements.push("/"); 381 return this; 382 } 383 384 @Override characters(char[] ch, int start, int length)385 public void characters(char[] ch, int start, int length) throws SAXException { 386 if (lastIsStart) chars.append(ch, start, length); 387 } 388 389 @Override startElement(String namespaceURI, String localName, String qName, Attributes atts)390 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 391 throws SAXException { 392 tempPath.setLength(0); 393 tempPath.append(startElements.peek()).append('/').append(qName); 394 for (int i = 0; i < atts.getLength(); ++i) { 395 tempPath.append("[@").append(atts.getQName(i)).append("=\"").append(atts.getValue(i).replace('"', '\'')).append("\"]"); 396 } 397 startElements.push(tempPath.toString()); 398 chars.setLength(0); // clear garbage 399 lastIsStart = true; 400 } 401 402 @Override endElement(String namespaceURI, String localName, String qName)403 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 404 String startElement = startElements.pop(); 405 if (lastIsStart) { 406 // System.out.println(startElement + ":" + chars); 407 simpleHandler.handlePathValue(startElement, chars.toString()); 408 } 409 chars.setLength(0); 410 lastIsStart = false; 411 } 412 413 @Override startDTD(String name, String publicId, String systemId)414 public void startDTD(String name, String publicId, String systemId) throws SAXException { 415 if (SHOW_ALL) Log.logln("startDTD name: " + name 416 + ", publicId: " + publicId 417 + ", systemId: " + systemId); 418 simpleHandler.handleStartDtd(name, publicId, systemId); 419 } 420 421 @Override endDTD()422 public void endDTD() throws SAXException { 423 if (SHOW_ALL) Log.logln("endDTD"); 424 simpleHandler.handleEndDtd(); 425 } 426 427 @Override comment(char[] ch, int start, int length)428 public void comment(char[] ch, int start, int length) throws SAXException { 429 if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length)); 430 commentChars.append(ch, start, length); 431 simpleHandler.handleComment(startElements.peek(), commentChars.toString()); 432 commentChars.setLength(0); 433 } 434 435 @Override elementDecl(String name, String model)436 public void elementDecl(String name, String model) throws SAXException { 437 simpleHandler.handleElementDecl(name, model); 438 } 439 440 @Override attributeDecl(String eName, String aName, String type, String mode, String value)441 public void attributeDecl(String eName, String aName, String type, String mode, String value) 442 throws SAXException { 443 simpleHandler.handleAttributeDecl(eName, aName, type, mode, value); 444 } 445 446 } 447 448 static final class AbortException extends RuntimeException { 449 private static final long serialVersionUID = 1L; 450 } 451 452 /** 453 * Show a SAX exception in a readable form. 454 */ showSAX(SAXParseException exception)455 public static String showSAX(SAXParseException exception) { 456 return exception.getMessage() 457 + ";\t SystemID: " + exception.getSystemId() 458 + ";\t PublicID: " + exception.getPublicId() 459 + ";\t LineNumber: " + exception.getLineNumber() 460 + ";\t ColumnNumber: " + exception.getColumnNumber(); 461 } 462 createXMLReader(boolean validating)463 public static XMLReader createXMLReader(boolean validating) { 464 // weiv 07/20/2007: The laundry list below is somewhat obsolete 465 // I have moved the system's default parser (instantiated when "" is 466 // passed) to the top, so that we will always use that. I have also 467 // removed "org.apache.crimson.parser.XMLReaderImpl" as this one gets 468 // confused regarding UTF-8 encoding name. 469 String[] testList = { 470 System.getProperty("CLDR_DEFAULT_SAX_PARSER", ""), // defaults to "", system default. 471 "org.apache.xerces.parsers.SAXParser", 472 "gnu.xml.aelfred2.XmlReader", 473 "com.bluecast.xml.Piccolo", 474 "oracle.xml.parser.v2.SAXParser" 475 }; 476 XMLReader result = null; 477 for (int i = 0; i < testList.length; ++i) { 478 try { 479 result = (testList[i].length() != 0) 480 ? XMLReaderFactory.createXMLReader(testList[i]) 481 : XMLReaderFactory.createXMLReader(); 482 result.setFeature("http://xml.org/sax/features/validation", validating); 483 break; 484 } catch (SAXException e1) { 485 } 486 } 487 if (result == null) 488 throw new NoClassDefFoundError("No SAX parser is available, or unable to set validation correctly"); 489 return result; 490 } 491 492 static final class DebuggingInputStream extends InputStream { 493 InputStream contents; 494 495 @Override close()496 public void close() throws IOException { 497 contents.close(); 498 } 499 DebuggingInputStream(InputStream fis)500 public DebuggingInputStream(InputStream fis) { 501 contents = fis; 502 } 503 504 @Override read()505 public int read() throws IOException { 506 int x = contents.read(); 507 System.out.println(Integer.toHexString(x) + ","); 508 return x; 509 } 510 } 511 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating)512 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating) { 513 return loadPathValues(filename, data, validating, false); 514 } 515 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full)516 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full) { 517 return loadPathValues(filename, data, validating, full, null); 518 } 519 loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, Function<String, String> valueFilter)520 public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, 521 Function<String, String> valueFilter) { 522 try { 523 new XMLFileReader() 524 .setHandler(new PathValueListHandler(data, full, valueFilter)) 525 .read(filename, -1, validating); 526 return data; 527 } catch (Exception e) { 528 throw new ICUException(filename, e); 529 } 530 } 531 processPathValues(String filename, boolean validating, SimpleHandler simpleHandler)532 public static void processPathValues(String filename, boolean validating, SimpleHandler simpleHandler) { 533 try { 534 new XMLFileReader() 535 .setHandler(simpleHandler) 536 .read(filename, -1, validating); 537 } catch (Exception e) { 538 throw new ICUException(filename, e); 539 } 540 } 541 542 static final class PathValueListHandler extends SimpleHandler { 543 List<Pair<String, String>> data; 544 boolean full; 545 private Function<String, String> valueFilter; 546 PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter)547 public PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter) { 548 super(); 549 this.data = data != null ? data : new ArrayList<>(); 550 this.full = full; 551 this.valueFilter = valueFilter; 552 } 553 554 @Override handlePathValue(String path, String value)555 public void handlePathValue(String path, String value) { 556 if (valueFilter == null) { 557 data.add(Pair.of(path, value)); 558 } else { 559 String filteredValue = valueFilter.apply(value); 560 if (filteredValue != null) { 561 data.add(Pair.of(path, filteredValue)); 562 } 563 } 564 } 565 566 @Override handleComment(String path, String comment)567 public void handleComment(String path, String comment) { 568 if (!full || path.equals("/")) { 569 return; 570 } 571 data.add(Pair.of("!", comment)); 572 } 573 } 574 } 575