1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin.dicttool; 18 19 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; 20 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; 21 import com.android.inputmethod.latin.makedict.DictDecoder; 22 import com.android.inputmethod.latin.makedict.FusionDictionary; 23 import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 24 25 import org.xml.sax.SAXException; 26 27 import java.io.BufferedInputStream; 28 import java.io.BufferedOutputStream; 29 import java.io.BufferedReader; 30 import java.io.File; 31 import java.io.FileInputStream; 32 import java.io.FileOutputStream; 33 import java.io.IOException; 34 import java.io.InputStream; 35 import java.io.InputStreamReader; 36 import java.io.OutputStream; 37 import java.util.ArrayList; 38 39 import javax.xml.parsers.ParserConfigurationException; 40 41 /** 42 * Class grouping utilities for offline dictionary making. 43 * 44 * Those should not be used on-device, essentially because they are quite 45 * liberal about I/O and performance. 46 */ 47 public final class BinaryDictOffdeviceUtils { 48 // Prefix and suffix are arbitrary, the values do not really matter 49 private final static String PREFIX = "dicttool"; 50 private final static String SUFFIX = ".tmp"; 51 52 public final static String COMPRESSION = "compressed"; 53 public final static String ENCRYPTION = "encrypted"; 54 55 private final static int MAX_DECODE_DEPTH = 8; 56 private final static int COPY_BUFFER_SIZE = 8192; 57 58 public static class DecoderChainSpec { 59 ArrayList<String> mDecoderSpec = new ArrayList<>(); 60 File mFile; 61 addStep(final String stepDescription)62 public DecoderChainSpec addStep(final String stepDescription) { 63 mDecoderSpec.add(stepDescription); 64 return this; 65 } 66 describeChain()67 public String describeChain() { 68 final StringBuilder s = new StringBuilder("raw"); 69 for (final String step : mDecoderSpec) { 70 s.append(" > "); 71 s.append(step); 72 } 73 return s.toString(); 74 } 75 } 76 copy(final InputStream input, final OutputStream output)77 public static void copy(final InputStream input, final OutputStream output) throws IOException { 78 final byte[] buffer = new byte[COPY_BUFFER_SIZE]; 79 for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) { 80 output.write(buffer, 0, readBytes); 81 } 82 } 83 84 /** 85 * Returns a decrypted/uncompressed dictionary. 86 * 87 * This will decrypt/uncompress any number of times as necessary until it finds the 88 * dictionary signature, and copy the decoded file to a temporary place. 89 * If this is not a dictionary, the method returns null. 90 */ getRawDictionaryOrNull(final File src)91 public static DecoderChainSpec getRawDictionaryOrNull(final File src) { 92 return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0); 93 } 94 getRawDictionaryOrNullInternal( final DecoderChainSpec spec, final File src, final int depth)95 private static DecoderChainSpec getRawDictionaryOrNullInternal( 96 final DecoderChainSpec spec, final File src, final int depth) { 97 // Unfortunately the decoding scheme we use can consider any data to be encrypted 98 // and will product some output, meaning it's not possible to reliably detect encrypted 99 // data. Thus, some non-dictionary files (especially small) ones may successfully decrypt 100 // over and over, ending in a stack overflow. Hence we limit the depth at which we try 101 // decoding the file. 102 if (depth > MAX_DECODE_DEPTH) return null; 103 if (BinaryDictDecoderUtils.isBinaryDictionary(src) 104 || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) { 105 spec.mFile = src; 106 return spec; 107 } 108 // It's not a raw dictionary - try to see if it's compressed. 109 final File uncompressedFile = tryGetUncompressedFile(src); 110 if (null != uncompressedFile) { 111 final DecoderChainSpec newSpec = 112 getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1); 113 if (null == newSpec) return null; 114 return newSpec.addStep(COMPRESSION); 115 } 116 // It's not a compressed either - try to see if it's crypted. 117 final File decryptedFile = tryGetDecryptedFile(src); 118 if (null != decryptedFile) { 119 final DecoderChainSpec newSpec = 120 getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1); 121 if (null == newSpec) return null; 122 return newSpec.addStep(ENCRYPTION); 123 } 124 return null; 125 } 126 127 /* Try to uncompress the file passed as an argument. 128 * 129 * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null 130 * is returned. 131 */ tryGetUncompressedFile(final File src)132 private static File tryGetUncompressedFile(final File src) { 133 try { 134 final File dst = File.createTempFile(PREFIX, SUFFIX); 135 dst.deleteOnExit(); 136 try ( 137 final InputStream input = Compress.getUncompressedStream( 138 new BufferedInputStream(new FileInputStream(src))); 139 final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst)) 140 ) { 141 copy(input, output); 142 return dst; 143 } 144 } catch (final IOException e) { 145 // Could not uncompress the file: presumably the file is simply not a compressed file 146 return null; 147 } 148 } 149 150 /* Try to decrypt the file passed as an argument. 151 * 152 * If the file can be decrypted, the decrypted version is returned. Otherwise, null 153 * is returned. 154 */ tryGetDecryptedFile(final File src)155 private static File tryGetDecryptedFile(final File src) { 156 try { 157 final File dst = File.createTempFile(PREFIX, SUFFIX); 158 dst.deleteOnExit(); 159 try ( 160 final InputStream input = Crypt.getDecryptedStream( 161 new BufferedInputStream(new FileInputStream(src))); 162 final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst)) 163 ) { 164 copy(input, output); 165 return dst; 166 } 167 } catch (final IOException e) { 168 // Could not decrypt the file: presumably the file is simply not a crypted file 169 return null; 170 } 171 } 172 getDictionary(final String filename, final boolean report)173 static FusionDictionary getDictionary(final String filename, final boolean report) { 174 final File file = new File(filename); 175 if (report) { 176 System.out.println("Dictionary : " + file.getAbsolutePath()); 177 System.out.println("Size : " + file.length() + " bytes"); 178 } 179 try { 180 if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) { 181 if (report) { 182 System.out.println("Format : XML unigram list"); 183 } 184 return XmlDictInputOutput.readDictionaryXml( 185 new BufferedInputStream(new FileInputStream(file)), 186 null /* shortcuts */, null /* bigrams */); 187 } 188 final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file); 189 if (null == decodedSpec) { 190 throw new RuntimeException("Does not seem to be a dictionary file " + filename); 191 } 192 if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) { 193 if (report) { 194 System.out.println("Format : Combined format"); 195 System.out.println("Packaging : " + decodedSpec.describeChain()); 196 System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); 197 } 198 try (final BufferedReader reader = new BufferedReader( 199 new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) { 200 return CombinedInputOutput.readDictionaryCombined(reader); 201 } 202 } 203 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder( 204 decodedSpec.mFile, 0, decodedSpec.mFile.length(), 205 DictDecoder.USE_BYTEARRAY); 206 if (report) { 207 System.out.println("Format : Binary dictionary format"); 208 System.out.println("Packaging : " + decodedSpec.describeChain()); 209 System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); 210 } 211 return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); 212 } catch (final IOException | SAXException | ParserConfigurationException | 213 UnsupportedFormatException e) { 214 throw new RuntimeException("Can't read file " + filename, e); 215 } 216 } 217 } 218