1 /* 2 * Copyright Simon Pepping 2009 3 * 4 * The copyright owner licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* $Id$ */ 18 19 package org.tug.texhyphen; 20 21 import java.io.File; 22 import java.io.FileInputStream; 23 import java.io.FileNotFoundException; 24 import java.io.FilenameFilter; 25 import java.io.IOException; 26 import java.io.InputStream; 27 import java.net.MalformedURLException; 28 import java.net.URI; 29 import java.net.URISyntaxException; 30 import java.net.URL; 31 import java.util.ArrayList; 32 import java.util.Arrays; 33 import java.util.Collection; 34 import java.util.HashMap; 35 import java.util.Map; 36 37 import javax.xml.parsers.ParserConfigurationException; 38 import javax.xml.parsers.SAXParser; 39 import javax.xml.parsers.SAXParserFactory; 40 import javax.xml.transform.Result; 41 import javax.xml.transform.Source; 42 import javax.xml.transform.Transformer; 43 import javax.xml.transform.TransformerException; 44 import javax.xml.transform.TransformerFactory; 45 import javax.xml.transform.sax.SAXTransformerFactory; 46 import javax.xml.transform.sax.TransformerHandler; 47 import javax.xml.transform.stream.StreamResult; 48 import javax.xml.transform.stream.StreamSource; 49 50 import org.xml.sax.Attributes; 51 import org.xml.sax.InputSource; 52 import org.xml.sax.SAXException; 53 import org.xml.sax.XMLReader; 54 import org.xml.sax.helpers.DefaultHandler; 55 56 /** 57 * Convert modern UTF8 TeX hyphenation patterns to XML format 58 */ 59 public final class ConvertTeXPattern { 60 convert(String[] texPatterns, String outfilePath, boolean useStylesheet, boolean useLanguagedata)61 public static void convert(String[] texPatterns, String outfilePath, boolean useStylesheet, 62 boolean useLanguagedata) 63 throws IOException, TransformerException, SAXException, URISyntaxException, 64 ParserConfigurationException, CodeMappingException { 65 checkCodeMapping(); 66 Collection<String> languages = codeMapping.keySet(); 67 convert(texPatterns, outfilePath, useStylesheet, languages); 68 } 69 convert(String[] texPatterns, String outfilePath, boolean useStylesheet)70 public static void convert(String[] texPatterns, String outfilePath, boolean useStylesheet) 71 throws IOException, TransformerException, SAXException, URISyntaxException, 72 CodeMappingException { 73 convert(texPatterns, outfilePath, useStylesheet, null); 74 } 75 76 /** 77 * infile outfile 78 * indir outdir (file protocol only) 79 * infiles outdir 80 * file and http protocols allowed 81 * 82 * @param texPatternUri 83 * @param outfilePath 84 * @param useStylesheet 85 * @param texcodes filter of requested tex codes; is allowed to be null 86 * @throws IOException 87 * @throws TransformerException 88 * @throws SAXException 89 * @throws URISyntaxException 90 * @throws CodeMappingException 91 */ convert(String[] texPatterns, String outfilePath, boolean useStylesheet, Collection<String> texcodes)92 public static void convert(String[] texPatterns, String outfilePath, boolean useStylesheet, 93 Collection<String> texcodes) 94 throws IOException, TransformerException, SAXException, URISyntaxException, 95 CodeMappingException { 96 File outDir = new File(outfilePath); 97 boolean oneTexcode = (texcodes != null && texcodes.size() == 1); 98 boolean oneInputfile = (texPatterns.length == 1); 99 boolean oneFilteredInput = (oneTexcode || oneInputfile); 100 if (!oneFilteredInput && !outDir.isDirectory()) { 101 throw new IllegalArgumentException 102 ("with multiple input files the output path " + outfilePath + " must be a directory"); 103 } 104 for (String texPattern : texPatterns) { 105 URI texPatternUri = makeTexPatternUri(texPattern); 106 URI[] texPatternUris = makeTexPatternUris(texPatternUri); 107 oneInputfile = (texPatternUris.length == 1); 108 oneFilteredInput = (oneTexcode || oneInputfile); 109 if (!oneFilteredInput && !outDir.isDirectory()) { 110 throw new IllegalArgumentException 111 ("with an input directory " + texPattern + " the output path " + outfilePath + " must be a directory"); 112 } 113 for (URI t : texPatternUris) { 114 TransformationData transformationData = makeTransformationData(t, outDir, texcodes); 115 if (transformationData == null) { 116 continue; 117 } 118 doConvert(t, transformationData, useStylesheet); 119 } 120 } 121 } 122 123 /** 124 * @param texPattern 125 * @return 126 * @throws URISyntaxException 127 * @throws FileNotFoundException 128 */ makeTexPatternUri(String texPattern)129 private static URI makeTexPatternUri(String texPattern) 130 throws URISyntaxException, FileNotFoundException { 131 URI texPatternUri; 132 texPatternUri = new URI(texPattern); 133 String scheme = texPatternUri.getScheme(); 134 // see if it is a relative file path 135 if (scheme == null) { 136 File f = new File(texPattern); 137 texPatternUri = new URI("file", null, f.getAbsolutePath(), null, null); 138 scheme = texPatternUri.getScheme(); 139 } 140 if (scheme == null || !(scheme.equals("http") || scheme.equals("file"))) { 141 throw new FileNotFoundException 142 ("URI with file or http scheme required for hyphenation pattern file"); 143 } 144 return texPatternUri; 145 } 146 147 /** 148 * @param outfilePath 149 * @param outDir 150 * @param texPatternUri 151 * @param scheme 152 * @return 153 * @throws URISyntaxException 154 */ makeTexPatternUris(URI texPatternUri)155 private static URI[] makeTexPatternUris(URI texPatternUri) throws URISyntaxException { 156 URI[] texPatternUris; 157 texPatternUris = new URI[] {texPatternUri}; 158 String scheme = texPatternUri.getScheme(); 159 if (scheme.equals("file")) { 160 File dir = new File(texPatternUri); 161 if (dir.isDirectory()) { 162 ArrayList<URI> l = new ArrayList<URI>(); 163 FilenameFilter filter = new FilenameFilter() { 164 public boolean accept(File dir, String name) { 165 return name.endsWith(".tex"); 166 } 167 }; 168 for (File f : dir.listFiles(filter)) { 169 l.add(new URI("file", null, f.getAbsolutePath(), null, null)); 170 } 171 texPatternUris = l.toArray(texPatternUris); 172 } 173 } 174 return texPatternUris; 175 } 176 177 /** 178 * @param t 179 * @param outDir 180 * @param texcodes filter of requested tex codes; is allowed to be null 181 * @return 182 * @throws CodeMappingException 183 */ makeTransformationData(URI t, File outDir, Collection<String> texcodes)184 private static TransformationData makeTransformationData 185 (URI t, File outDir, Collection<String> texcodes) throws CodeMappingException { 186 File outFile; 187 String path = t.getPath(); 188 String basename = path.substring(path.lastIndexOf('/') + 1); 189 String base = basename.substring(0, basename.lastIndexOf('.')); 190 // xmlCode, texCode 191 String[] codes = mapCode(base); 192 // code mapping lists no xmlCode 193 if (codes[0] == null) { 194 return null; 195 } 196 if (texcodes != null && !texcodes.contains(codes[1])) { 197 return null; 198 } 199 if (!outDir.isDirectory()) { 200 outFile = outDir; 201 } else { 202 outFile = new File(outDir, codes[0] + ".xml"); 203 } 204 return new TransformationData(outFile, codes[1]); 205 } 206 207 private static class TransformationData { 208 File outFile; 209 String texCode; TransformationData(File outFile, String texCode)210 TransformationData(File outFile, String texCode) { 211 this.outFile = outFile; 212 this.texCode = texCode; 213 } 214 } 215 216 private static class CodeMappingException extends Exception { CodeMappingException(Exception e)217 public CodeMappingException(Exception e) { 218 super(e); 219 } CodeMappingException(String m)220 public CodeMappingException(String m) { 221 super(m); 222 } 223 } 224 225 static Map<String, String> codeMapping; 226 static CodeMappingException codeMappingException; 227 static { 228 try { 229 codeMapping = readLanguagedata(); 230 } catch (ParserConfigurationException e) { 231 codeMappingException = new CodeMappingException(e); 232 } catch (SAXException e) { 233 codeMappingException = new CodeMappingException(e); 234 } catch (IOException e) { 235 codeMappingException = new CodeMappingException(e); 236 } 237 } 238 mapCode(String texCode)239 private static String[] mapCode(String texCode) throws CodeMappingException { 240 checkCodeMapping(); 241 String hyp = "hyph-"; 242 String xmlCode = texCode; 243 if (texCode.startsWith(hyp)) { 244 texCode = texCode.substring(hyp.length()); 245 xmlCode = codeMapping.get(texCode); 246 } 247 return new String[] {xmlCode, texCode}; 248 } 249 250 /** 251 * @throws CodeMappingException 252 */ checkCodeMapping()253 private static void checkCodeMapping() throws CodeMappingException { 254 if (codeMapping == null) { 255 if (codeMappingException != null) { 256 throw codeMappingException; 257 } else { 258 throw new CodeMappingException("Failure initializing code mapping"); 259 } 260 } 261 } 262 readLanguagedata()263 public static Map<String,String> readLanguagedata() 264 throws ParserConfigurationException, SAXException, IOException { 265 SAXParserFactory spf = SAXParserFactory.newInstance(); 266 spf.setNamespaceAware(true); 267 SAXParser parser = spf.newSAXParser(); 268 InputStream is = ConvertTeXPattern.class.getResourceAsStream("languages.xml"); 269 TexcodeReader texcodeReader = new TexcodeReader(); 270 parser.parse(is, texcodeReader); 271 return texcodeReader.getTexcodes(); 272 } 273 274 private static class TexcodeReader extends DefaultHandler { 275 276 private Map<String, String> texcodes = new HashMap<String, String>(); 277 278 /* (non-Javadoc) 279 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) 280 */ 281 @Override startElement(String uri, String localName, String qName, Attributes attributes)282 public void startElement(String uri, String localName, String qName, 283 Attributes attributes) throws SAXException { 284 if (uri.equals(LanguageDataParser.LANG_NAMESPACE) && localName.equals("language")) { 285 String texcode = attributes.getValue("code"); 286 String fopcode = attributes.getValue("fop-code"); 287 if (fopcode != null) { 288 texcodes.put(texcode, fopcode); 289 } 290 } 291 } 292 293 /** 294 * @return the texcodes 295 */ getTexcodes()296 public Map<String,String> getTexcodes() { 297 return texcodes; 298 } 299 300 } 301 doConvert(URI texPatternUri, TransformationData outdata, boolean useStylesheet)302 public static void doConvert(URI texPatternUri, TransformationData outdata, boolean useStylesheet) 303 throws TransformerException, SAXException, MalformedURLException, IOException, URISyntaxException { 304 305 String scheme = texPatternUri.getScheme(); 306 InputStream inis = null; 307 if (scheme.equals("file")) { 308 File in = new File(texPatternUri); 309 inis = new FileInputStream(in); 310 } else if (scheme.equals("http")) { 311 inis = texPatternUri.toURL().openStream(); 312 } else { 313 throw new FileNotFoundException 314 ("URI with file or http scheme required for hyphenation pattern file"); 315 } 316 317 InputSource input = new InputSource(inis); 318 input.setSystemId(texPatternUri.toString()); 319 input.setEncoding("utf-8"); 320 XMLReader reader = new TeXPatternParser(); 321 Result result = new StreamResult(outdata.outFile); 322 TransformerFactory tf = TransformerFactory.newInstance(); 323 if (!tf.getFeature(SAXTransformerFactory.FEATURE)) { 324 throw new TransformerException("TransformerFactory is not a SAXTransformerFactory"); 325 } 326 SAXTransformerFactory stf = (SAXTransformerFactory) tf; 327 TransformerHandler th; 328 if (useStylesheet) { 329 URL xsltUrl = ConvertTeXPattern.class.getResource("ConvertTeXPattern.xsl"); 330 File xsltFile = new File(xsltUrl.toURI()); 331 InputStream xsltStream = new FileInputStream(xsltFile); 332 Source xsltSource = new StreamSource(xsltStream); 333 xsltSource.setSystemId(xsltFile.getAbsolutePath()); 334 th = stf.newTransformerHandler(xsltSource); 335 Transformer tr = th.getTransformer(); 336 tr.setParameter("tex-code", outdata.texCode); 337 } else { 338 th = stf.newTransformerHandler(); 339 } 340 reader.setContentHandler(th); 341 reader.setProperty("http://xml.org/sax/properties/lexical-handler", th); 342 th.setResult(result); 343 reader.parse(input); 344 } 345 346 /** 347 * @param args input URI, output file 348 * @throws URISyntaxException if the URI is not correct 349 * @throws IOException if a file is not found, or contains illegal content 350 * @throws TransformerException 351 * @throws SAXException 352 * @throws ParserConfigurationException 353 * @throws CodeMappingException 354 */ main(String[] args)355 public static void main(String[] args) 356 throws URISyntaxException, IOException, TransformerException, SAXException, 357 ParserConfigurationException, CodeMappingException { 358 String prefix = "--"; 359 int i = 0; 360 boolean useStylesheet = true; 361 boolean useLanguagedata = false; 362 Collection<String> texcodes = null; 363 while (args[i].startsWith(prefix)) { 364 String option = args[i].substring(prefix.length()); 365 if (option.equals("debug")) { 366 useStylesheet = false; 367 } else if (option.equals("uselanguagedata") || option.equals("langdata")) { 368 useLanguagedata = true; 369 } else if (option.equals("texcodes")) { 370 texcodes = Arrays.asList(args[++i].split(",")); 371 } else { 372 throw new IllegalArgumentException("Unknown option: " + option); 373 } 374 ++i; 375 } 376 if (texcodes != null) { 377 convert(Arrays.copyOfRange(args, i, args.length - 1), args[args.length - 1], 378 useStylesheet, texcodes); 379 } else { 380 convert(Arrays.copyOfRange(args, i, args.length - 1), args[args.length - 1], 381 useStylesheet, useLanguagedata); 382 } 383 } 384 385 } 386