• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2004, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import java.io.FileInputStream;
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.io.Reader;
15 import java.util.ArrayList;
16 import java.util.List;
17 import java.util.Stack;
18 
19 import org.xml.sax.Attributes;
20 import org.xml.sax.ContentHandler;
21 import org.xml.sax.ErrorHandler;
22 import org.xml.sax.InputSource;
23 import org.xml.sax.Locator;
24 import org.xml.sax.SAXException;
25 import org.xml.sax.SAXNotRecognizedException;
26 import org.xml.sax.SAXNotSupportedException;
27 import org.xml.sax.SAXParseException;
28 import org.xml.sax.XMLReader;
29 import org.xml.sax.ext.DeclHandler;
30 import org.xml.sax.ext.LexicalHandler;
31 import org.xml.sax.helpers.XMLReaderFactory;
32 
33 import com.google.common.base.Function;
34 import com.ibm.icu.util.ICUException;
35 import com.ibm.icu.util.ICUUncheckedIOException;
36 
37 /**
38  * Convenience class to make reading XML data files easier. The main method is read();
39  * This is meant for XML data files, so the contents of elements must either be all other elements, or
40  * just text. It is thus not suitable for XML files with MIXED content;
41  * all text content in a mixed element is discarded.
42  *
43  * @author davis
44  */
45 public class XMLFileReader {
46     static final boolean SHOW_ALL = false;
47     /**
48      * Handlers to use in read()
49      */
50     public static int CONTENT_HANDLER = 1, ERROR_HANDLER = 2, LEXICAL_HANDLER = 4, DECLARATION_HANDLER = 8;
51 
52     private MyContentHandler DEFAULT_DECLHANDLER = new MyContentHandler();
53     // TODO Add way to skip gathering value contents
54     // private ElementOnlyContentHandler ELEMENT_ONLY_DECLHANDLER = new ElementOnlyContentHandler();
55     private SimpleHandler simpleHandler;
56 
57     public static class SimpleHandler {
handlePathValue(String path, String value)58         public void handlePathValue(String path, String value) {
59         }
60 
handleComment(String path, String comment)61         public void handleComment(String path, String comment) {
62         }
63 
handleElementDecl(String name, String model)64         public void handleElementDecl(String name, String model) {
65         }
66 
handleAttributeDecl(String eName, String aName, String type, String mode, String value)67         public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
68         }
69 
handleEndDtd()70         public void handleEndDtd() {
71         }
72 
handleStartDtd(String name, String publicId, String systemId)73         public void handleStartDtd(String name, String publicId, String systemId) {
74         }
75     }
76 
setHandler(SimpleHandler simpleHandler)77     public XMLFileReader setHandler(SimpleHandler simpleHandler) {
78         this.simpleHandler = simpleHandler;
79         return this;
80     }
81 
82     /**
83      * Read an XML file. The order of the elements matches what was in the file.
84      *
85      * @param fileName
86      *            file to open
87      * @param handlers
88      *            a set of values for the handlers to use, eg CONTENT_HANDLER | ERROR_HANDLER
89      * @param validating
90      *            if a validating parse is requested
91      * @return list of alternating values.
92      */
read(String fileName, int handlers, boolean validating)93     public XMLFileReader read(String fileName, int handlers, boolean validating) {
94         try (InputStream fis = new FileInputStream(fileName);
95             ) {
96             return read(fileName, new InputSource(fis), handlers, validating);
97         } catch (IOException e) {
98             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fileName).initCause(e);
99         }
100     }
101 
102 
103     /**
104      * read from a CLDR resource
105      * @param fileName
106      * @param handlers
107      * @param validating
108      * @param fis
109      * @see CldrUtility#getInputStream(String)
110      * @return
111      */
readCLDRResource(String resName, int handlers, boolean validating)112     public XMLFileReader readCLDRResource(String resName, int handlers, boolean validating) {
113         try (InputStream inputStream = CldrUtility.getInputStream(resName)) {
114             return read(resName, new InputSource(inputStream), handlers, validating);
115         } catch (IOException e) {
116             throw new ICUUncheckedIOException(e);
117         }
118     }
119 
120     /**
121      * read from an arbitrary
122      * @param fileName
123      * @param handlers
124      * @param validating
125      * @param fis
126      * @see CldrUtility#getInputStream(String)
127      * @return
128      */
read(String resName, Class<?> callingClass, int handlers, boolean validating)129     public XMLFileReader read(String resName, Class<?> callingClass, int handlers, boolean validating) {
130         try (InputStream inputStream = CldrUtility.getInputStream(callingClass, resName)) {
131             return read(resName, new InputSource(inputStream), handlers, validating);
132         } catch (IOException e) {
133             throw new ICUUncheckedIOException(e);
134         }
135     }
136 
read(String systemID, Reader reader, int handlers, boolean validating)137     public XMLFileReader read(String systemID, Reader reader, int handlers, boolean validating) {
138         read(systemID, reader, handlers, validating, DEFAULT_DECLHANDLER.reset());
139         return this;
140     }
141 
read(String systemID, InputSource insrc, int handlers, boolean validating)142     public XMLFileReader read(String systemID, InputSource insrc, int handlers, boolean validating) {
143         read(systemID, insrc, handlers, validating, DEFAULT_DECLHANDLER.reset());
144         return this;
145     }
146 
read(String systemID, InputStream instr, int handlers, boolean validating, AllHandler allHandler)147     public static void read(String systemID, InputStream instr, int handlers, boolean validating, AllHandler allHandler) {
148         InputSource is = new InputSource(instr);
149         read(systemID, is, handlers, validating, allHandler);
150     }
151 
read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler)152     public static void read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler) {
153         InputSource is = new InputSource(reader);
154         read(systemID, is, handlers, validating, allHandler);
155     }
156 
read(String systemID, InputSource is, int handlers, boolean validating, AllHandler allHandler)157     public static void read(String systemID, InputSource is, int handlers, boolean validating, AllHandler allHandler) {
158         try {
159             XMLReader xmlReader = createXMLReader(handlers, validating, allHandler);
160             is.setSystemId(systemID);
161             try {
162                 xmlReader.parse(is);
163             } catch (AbortException e) {
164             } // ok
165         } catch (SAXParseException e) {
166             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID + "\tline:\t"
167                 + e.getLineNumber()).initCause(e);
168         } catch (SAXException | IOException e) {
169             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e);
170         }
171     }
172 
createXMLReader(int handlers, boolean validating, AllHandler allHandler)173     private static final XMLReader createXMLReader(int handlers, boolean validating, AllHandler allHandler) throws SAXNotRecognizedException, SAXNotSupportedException {
174         XMLReader xmlReader = createXMLReader(validating);
175         if ((handlers & CONTENT_HANDLER) != 0) {
176             xmlReader.setContentHandler(allHandler);
177         }
178         if ((handlers & ERROR_HANDLER) != 0) {
179             xmlReader.setErrorHandler(allHandler);
180         }
181         if ((handlers & LEXICAL_HANDLER) != 0) {
182             xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", allHandler);
183         }
184         if ((handlers & DECLARATION_HANDLER) != 0) {
185             xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", allHandler);
186         }
187         return xmlReader;
188     }
189 
190     public interface AllHandler extends ContentHandler, LexicalHandler, DeclHandler, ErrorHandler {
191 
192     }
193 
194 
195     /** Basis for handlers that provides for logging, with no actions on methods
196      */
197     static public class LoggingHandler implements AllHandler {
198         @Override
startDocument()199         public void startDocument() throws SAXException {
200             if (SHOW_ALL) Log.logln("startDocument");
201         }
202 
203         @Override
characters(char[] ch, int start, int length)204         public void characters(char[] ch, int start, int length) throws SAXException {
205             if (SHOW_ALL) Log.logln("characters");
206         }
207 
208         @Override
startElement(String namespaceURI, String localName, String qName, Attributes atts)209         public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
210             throws SAXException {
211             if (SHOW_ALL) Log.logln("startElement");
212         }
213 
214         @Override
endElement(String namespaceURI, String localName, String qName)215         public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
216             if (SHOW_ALL) Log.logln("endElement");
217         }
218 
219         @Override
startDTD(String name, String publicId, String systemId)220         public void startDTD(String name, String publicId, String systemId) throws SAXException {
221             if (SHOW_ALL) Log.logln("startDTD");
222         }
223 
224         @Override
endDTD()225         public void endDTD() throws SAXException {
226             if (SHOW_ALL) Log.logln("endDTD");
227         }
228 
229         @Override
comment(char[] ch, int start, int length)230         public void comment(char[] ch, int start, int length) throws SAXException {
231             if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length));
232         }
233 
234         @Override
elementDecl(String name, String model)235         public void elementDecl(String name, String model) throws SAXException {
236             if (SHOW_ALL) Log.logln("elementDecl");
237         }
238 
239         @Override
attributeDecl(String eName, String aName, String type, String mode, String value)240         public void attributeDecl(String eName, String aName, String type, String mode, String value)
241             throws SAXException {
242             if (SHOW_ALL) Log.logln("attributeDecl");
243         }
244 
245         @Override
ignorableWhitespace(char[] ch, int start, int length)246         public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
247             if (SHOW_ALL) Log.logln("ignorableWhitespace length: " + length);
248         }
249 
250         @Override
endDocument()251         public void endDocument() throws SAXException {
252             if (SHOW_ALL) Log.logln("endDocument");
253         }
254 
255         @Override
internalEntityDecl(String name, String value)256         public void internalEntityDecl(String name, String value) throws SAXException {
257             if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + value);
258         }
259 
260         @Override
externalEntityDecl(String name, String publicId, String systemId)261         public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException {
262             if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId);
263         }
264 
notationDecl(String name, String publicId, String systemId)265         public void notationDecl(String name, String publicId, String systemId) {
266             if (SHOW_ALL) Log.logln("notationDecl: " + name
267                 + ", " + publicId
268                 + ", " + systemId);
269         }
270 
271         @Override
processingInstruction(String target, String data)272         public void processingInstruction(String target, String data)
273             throws SAXException {
274             if (SHOW_ALL) Log.logln("processingInstruction: " + target + ", " + data);
275         }
276 
277         @Override
skippedEntity(String name)278         public void skippedEntity(String name)
279             throws SAXException {
280             if (SHOW_ALL) Log.logln("skippedEntity: " + name);
281         }
282 
unparsedEntityDecl(String name, String publicId, String systemId, String notationName)283         public void unparsedEntityDecl(String name, String publicId,
284             String systemId, String notationName) {
285             if (SHOW_ALL) Log.logln("unparsedEntityDecl: " + name
286                 + ", " + publicId
287                 + ", " + systemId
288                 + ", " + notationName);
289         }
290 
291         @Override
setDocumentLocator(Locator locator)292         public void setDocumentLocator(Locator locator) {
293             if (SHOW_ALL) Log.logln("setDocumentLocator Locator " + locator);
294         }
295 
296         @Override
startPrefixMapping(String prefix, String uri)297         public void startPrefixMapping(String prefix, String uri) throws SAXException {
298             if (SHOW_ALL) Log.logln("startPrefixMapping prefix: " + prefix +
299                 ", uri: " + uri);
300         }
301 
302         @Override
endPrefixMapping(String prefix)303         public void endPrefixMapping(String prefix) throws SAXException {
304             if (SHOW_ALL) Log.logln("endPrefixMapping prefix: " + prefix);
305         }
306 
307         @Override
startEntity(String name)308         public void startEntity(String name) throws SAXException {
309             if (SHOW_ALL) Log.logln("startEntity name: " + name);
310         }
311 
312         @Override
endEntity(String name)313         public void endEntity(String name) throws SAXException {
314             if (SHOW_ALL) Log.logln("endEntity name: " + name);
315         }
316 
317         @Override
startCDATA()318         public void startCDATA() throws SAXException {
319             if (SHOW_ALL) Log.logln("startCDATA");
320         }
321 
322         @Override
endCDATA()323         public void endCDATA() throws SAXException {
324             if (SHOW_ALL) Log.logln("endCDATA");
325         }
326 
327         /*
328          * (non-Javadoc)
329          *
330          * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
331          */
332         @Override
error(SAXParseException exception)333         public void error(SAXParseException exception) throws SAXException {
334             if (SHOW_ALL) Log.logln("error: " + showSAX(exception));
335             throw exception;
336         }
337 
338         /*
339          * (non-Javadoc)
340          *
341          * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
342          */
343         @Override
fatalError(SAXParseException exception)344         public void fatalError(SAXParseException exception) throws SAXException {
345             if (SHOW_ALL) Log.logln("fatalError: " + showSAX(exception));
346             throw exception;
347         }
348 
349         /*
350          * (non-Javadoc)
351          *
352          * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
353          */
354         @Override
warning(SAXParseException exception)355         public void warning(SAXParseException exception) throws SAXException {
356             if (SHOW_ALL) Log.logln("warning: " + showSAX(exception));
357             throw exception;
358         }
359 
360     }
361 
362     public class MyContentHandler extends LoggingHandler {
363         StringBuffer chars = new StringBuffer();
364         StringBuffer commentChars = new StringBuffer();
365         Stack<String> startElements = new Stack<>();
366         StringBuffer tempPath = new StringBuffer();
367         boolean lastIsStart = false;
368 
reset()369         public MyContentHandler reset() {
370             chars.setLength(0);
371             tempPath = new StringBuffer("/");
372             startElements.clear();
373             startElements.push("/");
374             return this;
375         }
376 
377         @Override
characters(char[] ch, int start, int length)378         public void characters(char[] ch, int start, int length) throws SAXException {
379             if (lastIsStart) chars.append(ch, start, length);
380         }
381 
382         @Override
startElement(String namespaceURI, String localName, String qName, Attributes atts)383         public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
384             throws SAXException {
385             tempPath.setLength(0);
386             tempPath.append(startElements.peek()).append('/').append(qName);
387             for (int i = 0; i < atts.getLength(); ++i) {
388                 tempPath.append("[@").append(atts.getQName(i)).append("=\"").append(atts.getValue(i).replace('"', '\'')).append("\"]");
389             }
390             startElements.push(tempPath.toString());
391             chars.setLength(0); // clear garbage
392             lastIsStart = true;
393         }
394 
395         @Override
endElement(String namespaceURI, String localName, String qName)396         public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
397             String startElement = startElements.pop();
398             if (lastIsStart) {
399                 // System.out.println(startElement + ":" + chars);
400                 simpleHandler.handlePathValue(startElement, chars.toString());
401             }
402             chars.setLength(0);
403             lastIsStart = false;
404         }
405 
406         @Override
startDTD(String name, String publicId, String systemId)407         public void startDTD(String name, String publicId, String systemId) throws SAXException {
408             if (SHOW_ALL) Log.logln("startDTD name: " + name
409                 + ", publicId: " + publicId
410                 + ", systemId: " + systemId);
411             simpleHandler.handleStartDtd(name, publicId, systemId);
412         }
413 
414         @Override
endDTD()415         public void endDTD() throws SAXException {
416             if (SHOW_ALL) Log.logln("endDTD");
417             simpleHandler.handleEndDtd();
418         }
419 
420         @Override
comment(char[] ch, int start, int length)421         public void comment(char[] ch, int start, int length) throws SAXException {
422             if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length));
423             commentChars.append(ch, start, length);
424             simpleHandler.handleComment(startElements.peek(), commentChars.toString());
425             commentChars.setLength(0);
426         }
427 
428         @Override
elementDecl(String name, String model)429         public void elementDecl(String name, String model) throws SAXException {
430             simpleHandler.handleElementDecl(name, model);
431         }
432 
433         @Override
attributeDecl(String eName, String aName, String type, String mode, String value)434         public void attributeDecl(String eName, String aName, String type, String mode, String value)
435             throws SAXException {
436             simpleHandler.handleAttributeDecl(eName, aName, type, mode, value);
437         }
438 
439     }
440 
441     static final class AbortException extends RuntimeException {
442         private static final long serialVersionUID = 1L;
443     }
444 
445     /**
446      * Show a SAX exception in a readable form.
447      */
showSAX(SAXParseException exception)448     public static String showSAX(SAXParseException exception) {
449         return exception.getMessage()
450             + ";\t SystemID: " + exception.getSystemId()
451             + ";\t PublicID: " + exception.getPublicId()
452             + ";\t LineNumber: " + exception.getLineNumber()
453             + ";\t ColumnNumber: " + exception.getColumnNumber();
454     }
455 
createXMLReader(boolean validating)456     public static XMLReader createXMLReader(boolean validating) {
457         // weiv 07/20/2007: The laundry list below is somewhat obsolete
458         // I have moved the system's default parser (instantiated when "" is
459         // passed) to the top, so that we will always use that. I have also
460         // removed "org.apache.crimson.parser.XMLReaderImpl" as this one gets
461         // confused regarding UTF-8 encoding name.
462         String[] testList = {
463             System.getProperty("CLDR_DEFAULT_SAX_PARSER", ""), // defaults to "", system default.
464             "org.apache.xerces.parsers.SAXParser",
465             "gnu.xml.aelfred2.XmlReader",
466             "com.bluecast.xml.Piccolo",
467             "oracle.xml.parser.v2.SAXParser"
468         };
469         XMLReader result = null;
470         for (int i = 0; i < testList.length; ++i) {
471             try {
472                 result = (testList[i].length() != 0)
473                     ? XMLReaderFactory.createXMLReader(testList[i])
474                         : XMLReaderFactory.createXMLReader();
475                     result.setFeature("http://xml.org/sax/features/validation", validating);
476                     break;
477             } catch (SAXException e1) {
478             }
479         }
480         if (result == null)
481             throw new NoClassDefFoundError("No SAX parser is available, or unable to set validation correctly");
482         return result;
483     }
484 
485     static final class DebuggingInputStream extends InputStream {
486         InputStream contents;
487 
488         @Override
close()489         public void close() throws IOException {
490             contents.close();
491         }
492 
DebuggingInputStream(InputStream fis)493         public DebuggingInputStream(InputStream fis) {
494             contents = fis;
495         }
496 
497         @Override
read()498         public int read() throws IOException {
499             int x = contents.read();
500             System.out.println(Integer.toHexString(x) + ",");
501             return x;
502         }
503     }
504 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating)505     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating) {
506         return loadPathValues(filename, data, validating, false);
507     }
508 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full)509     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full) {
510         return loadPathValues(filename, data, validating, full, null);
511     }
512 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, Function<String, String> valueFilter)513     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full,
514         Function<String, String> valueFilter) {
515         try {
516             new XMLFileReader()
517             .setHandler(new PathValueListHandler(data, full, valueFilter))
518             .read(filename, -1, validating);
519             return data;
520         } catch (Exception e) {
521             throw new ICUException(filename, e);
522         }
523     }
524 
processPathValues(String filename, boolean validating, SimpleHandler simpleHandler)525     public static void processPathValues(String filename, boolean validating, SimpleHandler simpleHandler) {
526         try {
527             new XMLFileReader()
528             .setHandler(simpleHandler)
529             .read(filename, -1, validating);
530         } catch (Exception e) {
531             throw new ICUException(filename, e);
532         }
533     }
534 
535     static final class PathValueListHandler extends SimpleHandler {
536         List<Pair<String, String>> data;
537         boolean full;
538         private Function<String, String> valueFilter;
539 
PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter)540         public PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter) {
541             super();
542             this.data = data != null ? data : new ArrayList<>();
543             this.full = full;
544             this.valueFilter = valueFilter;
545         }
546 
547         @Override
handlePathValue(String path, String value)548         public void handlePathValue(String path, String value) {
549             if (valueFilter == null) {
550                 data.add(Pair.of(path, value));
551             } else {
552                 String filteredValue = valueFilter.apply(value);
553                 if (filteredValue != null) {
554                     data.add(Pair.of(path, filteredValue));
555                 }
556             }
557         }
558 
559         @Override
handleComment(String path, String comment)560         public void handleComment(String path, String comment) {
561             if (!full || path.equals("/")) {
562                 return;
563             }
564             data.add(Pair.of("!", comment));
565         }
566     }
567 }
568