• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright Simon Pepping 2009
3  *
4  * The copyright owner licenses this file to You under the Apache License, Version 2.0
5  * (the "License"); you may not use this file except in compliance with
6  * the License.  You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* $Id$ */
18 
19 package org.tug.texhyphen;
20 
21 import java.io.File;
22 import java.io.FileInputStream;
23 import java.io.FileNotFoundException;
24 import java.io.FilenameFilter;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.net.MalformedURLException;
28 import java.net.URI;
29 import java.net.URISyntaxException;
30 import java.net.URL;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collection;
34 import java.util.HashMap;
35 import java.util.Map;
36 
37 import javax.xml.parsers.ParserConfigurationException;
38 import javax.xml.parsers.SAXParser;
39 import javax.xml.parsers.SAXParserFactory;
40 import javax.xml.transform.Result;
41 import javax.xml.transform.Source;
42 import javax.xml.transform.Transformer;
43 import javax.xml.transform.TransformerException;
44 import javax.xml.transform.TransformerFactory;
45 import javax.xml.transform.sax.SAXTransformerFactory;
46 import javax.xml.transform.sax.TransformerHandler;
47 import javax.xml.transform.stream.StreamResult;
48 import javax.xml.transform.stream.StreamSource;
49 
50 import org.xml.sax.Attributes;
51 import org.xml.sax.InputSource;
52 import org.xml.sax.SAXException;
53 import org.xml.sax.XMLReader;
54 import org.xml.sax.helpers.DefaultHandler;
55 
56 /**
57  * Convert modern UTF8 TeX hyphenation patterns to XML format
58  */
59 public final class ConvertTeXPattern {
60 
convert(String[] texPatterns, String outfilePath, boolean useStylesheet, boolean useLanguagedata)61     public static void convert(String[] texPatterns, String outfilePath, boolean useStylesheet,
62                                boolean useLanguagedata)
63     throws IOException, TransformerException, SAXException, URISyntaxException,
64     ParserConfigurationException, CodeMappingException {
65         checkCodeMapping();
66         Collection<String> languages = codeMapping.keySet();
67         convert(texPatterns, outfilePath, useStylesheet, languages);
68     }
69 
convert(String[] texPatterns, String outfilePath, boolean useStylesheet)70     public static void convert(String[] texPatterns, String outfilePath, boolean useStylesheet)
71     throws IOException, TransformerException, SAXException, URISyntaxException,
72     CodeMappingException {
73         convert(texPatterns, outfilePath, useStylesheet, null);
74     }
75 
76     /**
77      * infile outfile
78      * indir outdir (file protocol only)
79      * infiles outdir
80      * file and http protocols allowed
81      *
82      * @param texPatternUri
83      * @param outfilePath
84      * @param useStylesheet
85      * @param texcodes filter of requested tex codes; is allowed to be null
86      * @throws IOException
87      * @throws TransformerException
88      * @throws SAXException
89      * @throws URISyntaxException
90      * @throws CodeMappingException
91      */
convert(String[] texPatterns, String outfilePath, boolean useStylesheet, Collection<String> texcodes)92     public static void convert(String[] texPatterns, String outfilePath, boolean useStylesheet,
93                                Collection<String> texcodes)
94     throws IOException, TransformerException, SAXException, URISyntaxException,
95     CodeMappingException {
96         File outDir = new File(outfilePath);
97         boolean oneTexcode = (texcodes != null && texcodes.size() == 1);
98         boolean oneInputfile = (texPatterns.length == 1);
99         boolean oneFilteredInput = (oneTexcode || oneInputfile);
100         if (!oneFilteredInput && !outDir.isDirectory()) {
101             throw new IllegalArgumentException
102             ("with multiple input files the output path " + outfilePath + " must be a directory");
103         }
104         for (String texPattern : texPatterns) {
105             URI texPatternUri = makeTexPatternUri(texPattern);
106             URI[] texPatternUris = makeTexPatternUris(texPatternUri);
107             oneInputfile = (texPatternUris.length == 1);
108             oneFilteredInput = (oneTexcode || oneInputfile);
109             if (!oneFilteredInput && !outDir.isDirectory()) {
110                 throw new IllegalArgumentException
111                 ("with an input directory " + texPattern + " the output path " + outfilePath + " must be a directory");
112             }
113             for (URI t : texPatternUris) {
114                 TransformationData transformationData = makeTransformationData(t, outDir, texcodes);
115                 if (transformationData == null) {
116                     continue;
117                 }
118                 doConvert(t, transformationData, useStylesheet);
119             }
120         }
121     }
122 
123     /**
124      * @param texPattern
125      * @return
126      * @throws URISyntaxException
127      * @throws FileNotFoundException
128      */
makeTexPatternUri(String texPattern)129     private static URI makeTexPatternUri(String texPattern)
130     throws URISyntaxException, FileNotFoundException {
131         URI texPatternUri;
132         texPatternUri = new URI(texPattern);
133         String scheme = texPatternUri.getScheme();
134         // see if it is a relative file path
135         if (scheme == null) {
136             File f = new File(texPattern);
137             texPatternUri = new URI("file", null, f.getAbsolutePath(), null, null);
138             scheme = texPatternUri.getScheme();
139         }
140         if (scheme == null || !(scheme.equals("http") || scheme.equals("file"))) {
141             throw new FileNotFoundException
142             ("URI with file or http scheme required for hyphenation pattern file");
143         }
144         return texPatternUri;
145     }
146 
147     /**
148      * @param outfilePath
149      * @param outDir
150      * @param texPatternUri
151      * @param scheme
152      * @return
153      * @throws URISyntaxException
154      */
makeTexPatternUris(URI texPatternUri)155     private static URI[] makeTexPatternUris(URI texPatternUri) throws URISyntaxException {
156         URI[] texPatternUris;
157         texPatternUris = new URI[] {texPatternUri};
158         String scheme = texPatternUri.getScheme();
159         if (scheme.equals("file")) {
160             File dir = new File(texPatternUri);
161             if (dir.isDirectory()) {
162                 ArrayList<URI> l = new ArrayList<URI>();
163                 FilenameFilter filter = new FilenameFilter() {
164                     public boolean accept(File dir, String name) {
165                         return name.endsWith(".tex");
166                     }
167                 };
168                 for (File f : dir.listFiles(filter)) {
169                     l.add(new URI("file", null, f.getAbsolutePath(), null, null));
170                 }
171                 texPatternUris = l.toArray(texPatternUris);
172             }
173         }
174         return texPatternUris;
175     }
176 
177     /**
178      * @param t
179      * @param outDir
180      * @param texcodes filter of requested tex codes; is allowed to be null
181      * @return
182      * @throws CodeMappingException
183      */
makeTransformationData(URI t, File outDir, Collection<String> texcodes)184     private static TransformationData makeTransformationData
185     (URI t, File outDir, Collection<String> texcodes) throws CodeMappingException {
186         File outFile;
187         String path = t.getPath();
188         String basename = path.substring(path.lastIndexOf('/') + 1);
189         String base = basename.substring(0, basename.lastIndexOf('.'));
190         // xmlCode, texCode
191         String[] codes = mapCode(base);
192         // code mapping lists no xmlCode
193         if (codes[0] == null) {
194             return null;
195         }
196         if (texcodes != null && !texcodes.contains(codes[1])) {
197             return null;
198         }
199         if (!outDir.isDirectory()) {
200             outFile = outDir;
201         } else {
202             outFile = new File(outDir, codes[0] + ".xml");
203         }
204         return new TransformationData(outFile, codes[1]);
205     }
206 
207     private static class TransformationData {
208         File outFile;
209         String texCode;
TransformationData(File outFile, String texCode)210         TransformationData(File outFile, String texCode) {
211             this.outFile = outFile;
212             this.texCode = texCode;
213         }
214     }
215 
216     private static class CodeMappingException extends Exception {
CodeMappingException(Exception e)217         public CodeMappingException(Exception e) {
218             super(e);
219         }
CodeMappingException(String m)220         public CodeMappingException(String m) {
221             super(m);
222         }
223     }
224 
225     static Map<String, String> codeMapping;
226     static CodeMappingException codeMappingException;
227     static {
228         try {
229             codeMapping = readLanguagedata();
230         } catch (ParserConfigurationException e) {
231             codeMappingException = new CodeMappingException(e);
232         } catch (SAXException e) {
233             codeMappingException = new CodeMappingException(e);
234         } catch (IOException e) {
235             codeMappingException = new CodeMappingException(e);
236         }
237     }
238 
mapCode(String texCode)239     private static String[] mapCode(String texCode) throws CodeMappingException {
240         checkCodeMapping();
241         String hyp = "hyph-";
242         String xmlCode = texCode;
243         if (texCode.startsWith(hyp)) {
244             texCode = texCode.substring(hyp.length());
245             xmlCode = codeMapping.get(texCode);
246         }
247         return new String[] {xmlCode, texCode};
248     }
249 
250     /**
251      * @throws CodeMappingException
252      */
checkCodeMapping()253     private static void checkCodeMapping() throws CodeMappingException {
254         if (codeMapping == null) {
255             if (codeMappingException != null) {
256                 throw codeMappingException;
257             } else {
258                 throw new CodeMappingException("Failure initializing code mapping");
259             }
260         }
261     }
262 
readLanguagedata()263     public static Map<String,String> readLanguagedata()
264     throws ParserConfigurationException, SAXException, IOException {
265         SAXParserFactory spf = SAXParserFactory.newInstance();
266         spf.setNamespaceAware(true);
267         SAXParser parser = spf.newSAXParser();
268         InputStream is = ConvertTeXPattern.class.getResourceAsStream("languages.xml");
269         TexcodeReader texcodeReader = new TexcodeReader();
270         parser.parse(is, texcodeReader);
271         return texcodeReader.getTexcodes();
272     }
273 
274     private static class TexcodeReader extends DefaultHandler {
275 
276         private Map<String, String> texcodes = new HashMap<String, String>();
277 
278         /* (non-Javadoc)
279          * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
280          */
281         @Override
startElement(String uri, String localName, String qName, Attributes attributes)282         public void startElement(String uri, String localName, String qName,
283                                  Attributes attributes) throws SAXException {
284             if (uri.equals(LanguageDataParser.LANG_NAMESPACE) && localName.equals("language")) {
285                 String texcode = attributes.getValue("code");
286                 String fopcode = attributes.getValue("fop-code");
287                 if (fopcode != null) {
288                     texcodes.put(texcode, fopcode);
289                 }
290             }
291         }
292 
293         /**
294          * @return the texcodes
295          */
getTexcodes()296         public Map<String,String> getTexcodes() {
297             return texcodes;
298         }
299 
300     }
301 
doConvert(URI texPatternUri, TransformationData outdata, boolean useStylesheet)302     public static void doConvert(URI texPatternUri, TransformationData outdata, boolean useStylesheet)
303         throws TransformerException, SAXException, MalformedURLException, IOException, URISyntaxException {
304 
305         String scheme = texPatternUri.getScheme();
306         InputStream inis = null;
307         if (scheme.equals("file")) {
308             File in = new File(texPatternUri);
309             inis = new FileInputStream(in);
310         } else if (scheme.equals("http")) {
311             inis = texPatternUri.toURL().openStream();
312         } else {
313             throw new FileNotFoundException
314             ("URI with file or http scheme required for hyphenation pattern file");
315         }
316 
317         InputSource input = new InputSource(inis);
318         input.setSystemId(texPatternUri.toString());
319         input.setEncoding("utf-8");
320         XMLReader reader = new TeXPatternParser();
321         Result result = new StreamResult(outdata.outFile);
322         TransformerFactory tf = TransformerFactory.newInstance();
323         if (!tf.getFeature(SAXTransformerFactory.FEATURE)) {
324             throw new TransformerException("TransformerFactory is not a SAXTransformerFactory");
325         }
326         SAXTransformerFactory stf = (SAXTransformerFactory) tf;
327         TransformerHandler th;
328         if (useStylesheet) {
329             URL xsltUrl = ConvertTeXPattern.class.getResource("ConvertTeXPattern.xsl");
330             File xsltFile = new File(xsltUrl.toURI());
331             InputStream xsltStream = new FileInputStream(xsltFile);
332             Source xsltSource = new StreamSource(xsltStream);
333             xsltSource.setSystemId(xsltFile.getAbsolutePath());
334             th = stf.newTransformerHandler(xsltSource);
335             Transformer tr = th.getTransformer();
336             tr.setParameter("tex-code", outdata.texCode);
337         } else {
338             th = stf.newTransformerHandler();
339         }
340         reader.setContentHandler(th);
341         reader.setProperty("http://xml.org/sax/properties/lexical-handler", th);
342         th.setResult(result);
343         reader.parse(input);
344     }
345 
346     /**
347      * @param args input URI, output file
348      * @throws URISyntaxException if the URI is not correct
349      * @throws IOException if a file is not found, or contains illegal content
350      * @throws TransformerException
351      * @throws SAXException
352      * @throws ParserConfigurationException
353      * @throws CodeMappingException
354      */
main(String[] args)355     public static void main(String[] args)
356     throws URISyntaxException, IOException, TransformerException, SAXException,
357     ParserConfigurationException, CodeMappingException {
358         String prefix = "--";
359         int i = 0;
360         boolean useStylesheet = true;
361         boolean useLanguagedata = false;
362         Collection<String> texcodes = null;
363         while (args[i].startsWith(prefix)) {
364             String option = args[i].substring(prefix.length());
365             if (option.equals("debug")) {
366                 useStylesheet = false;
367             } else if (option.equals("uselanguagedata") || option.equals("langdata")) {
368                 useLanguagedata = true;
369             } else if (option.equals("texcodes")) {
370                 texcodes = Arrays.asList(args[++i].split(","));
371             } else {
372                 throw new IllegalArgumentException("Unknown option: " + option);
373             }
374             ++i;
375         }
376         if (texcodes != null) {
377             convert(Arrays.copyOfRange(args, i, args.length - 1), args[args.length - 1],
378                     useStylesheet, texcodes);
379         } else {
380             convert(Arrays.copyOfRange(args, i, args.length - 1), args[args.length - 1],
381                     useStylesheet, useLanguagedata);
382         }
383     }
384 
385 }
386