• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2004, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import java.io.File;
12 import java.io.FileInputStream;
13 import java.io.IOException;
14 import java.io.InputStream;
15 import java.io.Reader;
16 import java.util.ArrayList;
17 import java.util.List;
18 import java.util.Stack;
19 
20 import org.xml.sax.Attributes;
21 import org.xml.sax.ContentHandler;
22 import org.xml.sax.ErrorHandler;
23 import org.xml.sax.InputSource;
24 import org.xml.sax.Locator;
25 import org.xml.sax.SAXException;
26 import org.xml.sax.SAXNotRecognizedException;
27 import org.xml.sax.SAXNotSupportedException;
28 import org.xml.sax.SAXParseException;
29 import org.xml.sax.XMLReader;
30 import org.xml.sax.ext.DeclHandler;
31 import org.xml.sax.ext.LexicalHandler;
32 import org.xml.sax.helpers.XMLReaderFactory;
33 
34 import com.google.common.base.Function;
35 import com.ibm.icu.util.ICUException;
36 import com.ibm.icu.util.ICUUncheckedIOException;
37 
38 /**
39  * Convenience class to make reading XML data files easier. The main method is read();
40  * This is meant for XML data files, so the contents of elements must either be all other elements, or
41  * just text. It is thus not suitable for XML files with MIXED content;
42  * all text content in a mixed element is discarded.
43  *
44  * @author davis
45  */
46 public class XMLFileReader {
47     static final boolean SHOW_ALL = false;
48     /**
49      * Handlers to use in read()
50      */
51     public static int CONTENT_HANDLER = 1, ERROR_HANDLER = 2, LEXICAL_HANDLER = 4, DECLARATION_HANDLER = 8;
52 
53     private MyContentHandler DEFAULT_DECLHANDLER = new MyContentHandler();
54     // TODO Add way to skip gathering value contents
55     // private ElementOnlyContentHandler ELEMENT_ONLY_DECLHANDLER = new ElementOnlyContentHandler();
56     private SimpleHandler simpleHandler;
57 
58     public static class SimpleHandler {
handlePathValue(String path, String value)59         public void handlePathValue(String path, String value) {
60         }
61 
handleComment(String path, String comment)62         public void handleComment(String path, String comment) {
63         }
64 
handleElementDecl(String name, String model)65         public void handleElementDecl(String name, String model) {
66         }
67 
handleAttributeDecl(String eName, String aName, String type, String mode, String value)68         public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
69         }
70 
handleEndDtd()71         public void handleEndDtd() {
72         }
73 
handleStartDtd(String name, String publicId, String systemId)74         public void handleStartDtd(String name, String publicId, String systemId) {
75         }
76     }
77 
setHandler(SimpleHandler simpleHandler)78     public XMLFileReader setHandler(SimpleHandler simpleHandler) {
79         this.simpleHandler = simpleHandler;
80         return this;
81     }
82 
83     /**
84      * Read an XML file. The order of the elements matches what was in the file.
85      *
86      * @param fileName
87      *            file to open
88      * @param handlers
89      *            a set of values for the handlers to use, eg CONTENT_HANDLER | ERROR_HANDLER
90      * @param validating
91      *            if a validating parse is requested
92      * @return list of alternating values.
93      */
read(String fileName, int handlers, boolean validating)94     public XMLFileReader read(String fileName, int handlers, boolean validating) {
95         try (InputStream fis = new FileInputStream(fileName);
96             ) {
97             return read(fileName, new InputSource(fis), handlers, validating);
98         } catch (IOException e) {
99             File full = new File(fileName);
100             String fullName = fileName;
101             try {
102                 fullName = full.getCanonicalPath();
103             } catch (Exception IOException) {
104             }
105             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fullName).initCause(e);
106         }
107     }
108 
109 
110     /**
111      * read from a CLDR resource
112      * @param fileName
113      * @param handlers
114      * @param validating
115      * @param fis
116      * @see CldrUtility#getInputStream(String)
117      * @return
118      */
readCLDRResource(String resName, int handlers, boolean validating)119     public XMLFileReader readCLDRResource(String resName, int handlers, boolean validating) {
120         try (InputStream inputStream = CldrUtility.getInputStream(resName)) {
121             return read(resName, new InputSource(inputStream), handlers, validating);
122         } catch (IOException e) {
123             throw new ICUUncheckedIOException(e);
124         }
125     }
126 
127     /**
128      * read from an arbitrary
129      * @param fileName
130      * @param handlers
131      * @param validating
132      * @param fis
133      * @see CldrUtility#getInputStream(String)
134      * @return
135      */
read(String resName, Class<?> callingClass, int handlers, boolean validating)136     public XMLFileReader read(String resName, Class<?> callingClass, int handlers, boolean validating) {
137         try (InputStream inputStream = CldrUtility.getInputStream(callingClass, resName)) {
138             return read(resName, new InputSource(inputStream), handlers, validating);
139         } catch (IOException e) {
140             throw new ICUUncheckedIOException(e);
141         }
142     }
143 
read(String systemID, Reader reader, int handlers, boolean validating)144     public XMLFileReader read(String systemID, Reader reader, int handlers, boolean validating) {
145         read(systemID, reader, handlers, validating, DEFAULT_DECLHANDLER.reset());
146         return this;
147     }
148 
read(String systemID, InputSource insrc, int handlers, boolean validating)149     public XMLFileReader read(String systemID, InputSource insrc, int handlers, boolean validating) {
150         read(systemID, insrc, handlers, validating, DEFAULT_DECLHANDLER.reset());
151         return this;
152     }
153 
read(String systemID, InputStream instr, int handlers, boolean validating, AllHandler allHandler)154     public static void read(String systemID, InputStream instr, int handlers, boolean validating, AllHandler allHandler) {
155         InputSource is = new InputSource(instr);
156         read(systemID, is, handlers, validating, allHandler);
157     }
158 
read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler)159     public static void read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler) {
160         InputSource is = new InputSource(reader);
161         read(systemID, is, handlers, validating, allHandler);
162     }
163 
read(String systemID, InputSource is, int handlers, boolean validating, AllHandler allHandler)164     public static void read(String systemID, InputSource is, int handlers, boolean validating, AllHandler allHandler) {
165         try {
166             XMLReader xmlReader = createXMLReader(handlers, validating, allHandler);
167             is.setSystemId(systemID);
168             try {
169                 xmlReader.parse(is);
170             } catch (AbortException e) {
171             } // ok
172         } catch (SAXParseException e) {
173             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID + "\tline:\t"
174                 + e.getLineNumber()).initCause(e);
175         } catch (SAXException | IOException e) {
176             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e);
177         }
178     }
179 
createXMLReader(int handlers, boolean validating, AllHandler allHandler)180     private static final XMLReader createXMLReader(int handlers, boolean validating, AllHandler allHandler) throws SAXNotRecognizedException, SAXNotSupportedException {
181         XMLReader xmlReader = createXMLReader(validating);
182         if ((handlers & CONTENT_HANDLER) != 0) {
183             xmlReader.setContentHandler(allHandler);
184         }
185         if ((handlers & ERROR_HANDLER) != 0) {
186             xmlReader.setErrorHandler(allHandler);
187         }
188         if ((handlers & LEXICAL_HANDLER) != 0) {
189             xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", allHandler);
190         }
191         if ((handlers & DECLARATION_HANDLER) != 0) {
192             xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", allHandler);
193         }
194         return xmlReader;
195     }
196 
197     public interface AllHandler extends ContentHandler, LexicalHandler, DeclHandler, ErrorHandler {
198 
199     }
200 
201 
202     /** Basis for handlers that provides for logging, with no actions on methods
203      */
204     static public class LoggingHandler implements AllHandler {
205         @Override
startDocument()206         public void startDocument() throws SAXException {
207             if (SHOW_ALL) Log.logln("startDocument");
208         }
209 
210         @Override
characters(char[] ch, int start, int length)211         public void characters(char[] ch, int start, int length) throws SAXException {
212             if (SHOW_ALL) Log.logln("characters");
213         }
214 
215         @Override
startElement(String namespaceURI, String localName, String qName, Attributes atts)216         public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
217             throws SAXException {
218             if (SHOW_ALL) Log.logln("startElement");
219         }
220 
221         @Override
endElement(String namespaceURI, String localName, String qName)222         public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
223             if (SHOW_ALL) Log.logln("endElement");
224         }
225 
226         @Override
startDTD(String name, String publicId, String systemId)227         public void startDTD(String name, String publicId, String systemId) throws SAXException {
228             if (SHOW_ALL) Log.logln("startDTD");
229         }
230 
231         @Override
endDTD()232         public void endDTD() throws SAXException {
233             if (SHOW_ALL) Log.logln("endDTD");
234         }
235 
236         @Override
comment(char[] ch, int start, int length)237         public void comment(char[] ch, int start, int length) throws SAXException {
238             if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length));
239         }
240 
241         @Override
elementDecl(String name, String model)242         public void elementDecl(String name, String model) throws SAXException {
243             if (SHOW_ALL) Log.logln("elementDecl");
244         }
245 
246         @Override
attributeDecl(String eName, String aName, String type, String mode, String value)247         public void attributeDecl(String eName, String aName, String type, String mode, String value)
248             throws SAXException {
249             if (SHOW_ALL) Log.logln("attributeDecl");
250         }
251 
252         @Override
ignorableWhitespace(char[] ch, int start, int length)253         public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
254             if (SHOW_ALL) Log.logln("ignorableWhitespace length: " + length);
255         }
256 
257         @Override
endDocument()258         public void endDocument() throws SAXException {
259             if (SHOW_ALL) Log.logln("endDocument");
260         }
261 
262         @Override
internalEntityDecl(String name, String value)263         public void internalEntityDecl(String name, String value) throws SAXException {
264             if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + value);
265         }
266 
267         @Override
externalEntityDecl(String name, String publicId, String systemId)268         public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException {
269             if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId);
270         }
271 
notationDecl(String name, String publicId, String systemId)272         public void notationDecl(String name, String publicId, String systemId) {
273             if (SHOW_ALL) Log.logln("notationDecl: " + name
274                 + ", " + publicId
275                 + ", " + systemId);
276         }
277 
278         @Override
processingInstruction(String target, String data)279         public void processingInstruction(String target, String data)
280             throws SAXException {
281             if (SHOW_ALL) Log.logln("processingInstruction: " + target + ", " + data);
282         }
283 
284         @Override
skippedEntity(String name)285         public void skippedEntity(String name)
286             throws SAXException {
287             if (SHOW_ALL) Log.logln("skippedEntity: " + name);
288         }
289 
unparsedEntityDecl(String name, String publicId, String systemId, String notationName)290         public void unparsedEntityDecl(String name, String publicId,
291             String systemId, String notationName) {
292             if (SHOW_ALL) Log.logln("unparsedEntityDecl: " + name
293                 + ", " + publicId
294                 + ", " + systemId
295                 + ", " + notationName);
296         }
297 
298         @Override
setDocumentLocator(Locator locator)299         public void setDocumentLocator(Locator locator) {
300             if (SHOW_ALL) Log.logln("setDocumentLocator Locator " + locator);
301         }
302 
303         @Override
startPrefixMapping(String prefix, String uri)304         public void startPrefixMapping(String prefix, String uri) throws SAXException {
305             if (SHOW_ALL) Log.logln("startPrefixMapping prefix: " + prefix +
306                 ", uri: " + uri);
307         }
308 
309         @Override
endPrefixMapping(String prefix)310         public void endPrefixMapping(String prefix) throws SAXException {
311             if (SHOW_ALL) Log.logln("endPrefixMapping prefix: " + prefix);
312         }
313 
314         @Override
startEntity(String name)315         public void startEntity(String name) throws SAXException {
316             if (SHOW_ALL) Log.logln("startEntity name: " + name);
317         }
318 
319         @Override
endEntity(String name)320         public void endEntity(String name) throws SAXException {
321             if (SHOW_ALL) Log.logln("endEntity name: " + name);
322         }
323 
324         @Override
startCDATA()325         public void startCDATA() throws SAXException {
326             if (SHOW_ALL) Log.logln("startCDATA");
327         }
328 
329         @Override
endCDATA()330         public void endCDATA() throws SAXException {
331             if (SHOW_ALL) Log.logln("endCDATA");
332         }
333 
334         /*
335          * (non-Javadoc)
336          *
337          * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
338          */
339         @Override
error(SAXParseException exception)340         public void error(SAXParseException exception) throws SAXException {
341             if (SHOW_ALL) Log.logln("error: " + showSAX(exception));
342             throw exception;
343         }
344 
345         /*
346          * (non-Javadoc)
347          *
348          * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
349          */
350         @Override
fatalError(SAXParseException exception)351         public void fatalError(SAXParseException exception) throws SAXException {
352             if (SHOW_ALL) Log.logln("fatalError: " + showSAX(exception));
353             throw exception;
354         }
355 
356         /*
357          * (non-Javadoc)
358          *
359          * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
360          */
361         @Override
warning(SAXParseException exception)362         public void warning(SAXParseException exception) throws SAXException {
363             if (SHOW_ALL) Log.logln("warning: " + showSAX(exception));
364             throw exception;
365         }
366 
367     }
368 
369     public class MyContentHandler extends LoggingHandler {
370         StringBuffer chars = new StringBuffer();
371         StringBuffer commentChars = new StringBuffer();
372         Stack<String> startElements = new Stack<>();
373         StringBuffer tempPath = new StringBuffer();
374         boolean lastIsStart = false;
375 
reset()376         public MyContentHandler reset() {
377             chars.setLength(0);
378             tempPath = new StringBuffer("/");
379             startElements.clear();
380             startElements.push("/");
381             return this;
382         }
383 
384         @Override
characters(char[] ch, int start, int length)385         public void characters(char[] ch, int start, int length) throws SAXException {
386             if (lastIsStart) chars.append(ch, start, length);
387         }
388 
389         @Override
startElement(String namespaceURI, String localName, String qName, Attributes atts)390         public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
391             throws SAXException {
392             tempPath.setLength(0);
393             tempPath.append(startElements.peek()).append('/').append(qName);
394             for (int i = 0; i < atts.getLength(); ++i) {
395                 tempPath.append("[@").append(atts.getQName(i)).append("=\"").append(atts.getValue(i).replace('"', '\'')).append("\"]");
396             }
397             startElements.push(tempPath.toString());
398             chars.setLength(0); // clear garbage
399             lastIsStart = true;
400         }
401 
402         @Override
endElement(String namespaceURI, String localName, String qName)403         public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
404             String startElement = startElements.pop();
405             if (lastIsStart) {
406                 // System.out.println(startElement + ":" + chars);
407                 simpleHandler.handlePathValue(startElement, chars.toString());
408             }
409             chars.setLength(0);
410             lastIsStart = false;
411         }
412 
413         @Override
startDTD(String name, String publicId, String systemId)414         public void startDTD(String name, String publicId, String systemId) throws SAXException {
415             if (SHOW_ALL) Log.logln("startDTD name: " + name
416                 + ", publicId: " + publicId
417                 + ", systemId: " + systemId);
418             simpleHandler.handleStartDtd(name, publicId, systemId);
419         }
420 
421         @Override
endDTD()422         public void endDTD() throws SAXException {
423             if (SHOW_ALL) Log.logln("endDTD");
424             simpleHandler.handleEndDtd();
425         }
426 
427         @Override
comment(char[] ch, int start, int length)428         public void comment(char[] ch, int start, int length) throws SAXException {
429             if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length));
430             commentChars.append(ch, start, length);
431             simpleHandler.handleComment(startElements.peek(), commentChars.toString());
432             commentChars.setLength(0);
433         }
434 
435         @Override
elementDecl(String name, String model)436         public void elementDecl(String name, String model) throws SAXException {
437             simpleHandler.handleElementDecl(name, model);
438         }
439 
440         @Override
attributeDecl(String eName, String aName, String type, String mode, String value)441         public void attributeDecl(String eName, String aName, String type, String mode, String value)
442             throws SAXException {
443             simpleHandler.handleAttributeDecl(eName, aName, type, mode, value);
444         }
445 
446     }
447 
448     static final class AbortException extends RuntimeException {
449         private static final long serialVersionUID = 1L;
450     }
451 
452     /**
453      * Show a SAX exception in a readable form.
454      */
showSAX(SAXParseException exception)455     public static String showSAX(SAXParseException exception) {
456         return exception.getMessage()
457             + ";\t SystemID: " + exception.getSystemId()
458             + ";\t PublicID: " + exception.getPublicId()
459             + ";\t LineNumber: " + exception.getLineNumber()
460             + ";\t ColumnNumber: " + exception.getColumnNumber();
461     }
462 
createXMLReader(boolean validating)463     public static XMLReader createXMLReader(boolean validating) {
464         // weiv 07/20/2007: The laundry list below is somewhat obsolete
465         // I have moved the system's default parser (instantiated when "" is
466         // passed) to the top, so that we will always use that. I have also
467         // removed "org.apache.crimson.parser.XMLReaderImpl" as this one gets
468         // confused regarding UTF-8 encoding name.
469         String[] testList = {
470             System.getProperty("CLDR_DEFAULT_SAX_PARSER", ""), // defaults to "", system default.
471             "org.apache.xerces.parsers.SAXParser",
472             "gnu.xml.aelfred2.XmlReader",
473             "com.bluecast.xml.Piccolo",
474             "oracle.xml.parser.v2.SAXParser"
475         };
476         XMLReader result = null;
477         for (int i = 0; i < testList.length; ++i) {
478             try {
479                 result = (testList[i].length() != 0)
480                     ? XMLReaderFactory.createXMLReader(testList[i])
481                         : XMLReaderFactory.createXMLReader();
482                     result.setFeature("http://xml.org/sax/features/validation", validating);
483                     break;
484             } catch (SAXException e1) {
485             }
486         }
487         if (result == null)
488             throw new NoClassDefFoundError("No SAX parser is available, or unable to set validation correctly");
489         return result;
490     }
491 
492     static final class DebuggingInputStream extends InputStream {
493         InputStream contents;
494 
495         @Override
close()496         public void close() throws IOException {
497             contents.close();
498         }
499 
DebuggingInputStream(InputStream fis)500         public DebuggingInputStream(InputStream fis) {
501             contents = fis;
502         }
503 
504         @Override
read()505         public int read() throws IOException {
506             int x = contents.read();
507             System.out.println(Integer.toHexString(x) + ",");
508             return x;
509         }
510     }
511 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating)512     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating) {
513         return loadPathValues(filename, data, validating, false);
514     }
515 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full)516     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full) {
517         return loadPathValues(filename, data, validating, full, null);
518     }
519 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, Function<String, String> valueFilter)520     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full,
521         Function<String, String> valueFilter) {
522         try {
523             new XMLFileReader()
524             .setHandler(new PathValueListHandler(data, full, valueFilter))
525             .read(filename, -1, validating);
526             return data;
527         } catch (Exception e) {
528             throw new ICUException(filename, e);
529         }
530     }
531 
processPathValues(String filename, boolean validating, SimpleHandler simpleHandler)532     public static void processPathValues(String filename, boolean validating, SimpleHandler simpleHandler) {
533         try {
534             new XMLFileReader()
535             .setHandler(simpleHandler)
536             .read(filename, -1, validating);
537         } catch (Exception e) {
538             throw new ICUException(filename, e);
539         }
540     }
541 
542     static final class PathValueListHandler extends SimpleHandler {
543         List<Pair<String, String>> data;
544         boolean full;
545         private Function<String, String> valueFilter;
546 
PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter)547         public PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter) {
548             super();
549             this.data = data != null ? data : new ArrayList<>();
550             this.full = full;
551             this.valueFilter = valueFilter;
552         }
553 
554         @Override
handlePathValue(String path, String value)555         public void handlePathValue(String path, String value) {
556             if (valueFilter == null) {
557                 data.add(Pair.of(path, value));
558             } else {
559                 String filteredValue = valueFilter.apply(value);
560                 if (filteredValue != null) {
561                     data.add(Pair.of(path, filteredValue));
562                 }
563             }
564         }
565 
566         @Override
handleComment(String path, String comment)567         public void handleComment(String path, String comment) {
568             if (!full || path.equals("/")) {
569                 return;
570             }
571             data.add(Pair.of("!", comment));
572         }
573     }
574 }
575