1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin.dicttool; 18 19 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; 20 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; 21 import com.android.inputmethod.latin.makedict.DictDecoder; 22 import com.android.inputmethod.latin.makedict.DictionaryHeader; 23 import com.android.inputmethod.latin.makedict.FormatSpec; 24 import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; 25 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 26 import com.android.inputmethod.latin.makedict.FusionDictionary; 27 import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 28 29 import java.io.BufferedInputStream; 30 import java.io.BufferedOutputStream; 31 import java.io.BufferedReader; 32 import java.io.File; 33 import java.io.FileInputStream; 34 import java.io.FileNotFoundException; 35 import java.io.FileOutputStream; 36 import java.io.IOException; 37 import java.io.InputStream; 38 import java.io.InputStreamReader; 39 import java.io.OutputStream; 40 import java.util.HashMap; 41 42 import javax.annotation.Nonnull; 43 import javax.annotation.Nullable; 44 45 /** 46 * Class grouping utilities for offline dictionary making. 47 * 48 * Those should not be used on-device, essentially because they are quite 49 * liberal about I/O and performance. 50 */ 51 public final class BinaryDictOffdeviceUtils { 52 // Prefix and suffix are arbitrary, the values do not really matter 53 private final static String PREFIX = "dicttool"; 54 private final static String SUFFIX = ".tmp"; 55 private final static int COPY_BUFFER_SIZE = 8192; 56 57 public static class DecoderChainSpec<T> { 58 public final static int COMPRESSION = 1; 59 public final static int ENCRYPTION = 2; 60 61 private final static int[][] VALID_DECODER_CHAINS = { 62 { }, { COMPRESSION }, { ENCRYPTION, COMPRESSION } 63 }; 64 65 private final int mDecoderSpecIndex; 66 public T mResult; 67 DecoderChainSpec()68 public DecoderChainSpec() { 69 mDecoderSpecIndex = 0; 70 mResult = null; 71 } 72 DecoderChainSpec(final DecoderChainSpec<T> src)73 private DecoderChainSpec(final DecoderChainSpec<T> src) { 74 mDecoderSpecIndex = src.mDecoderSpecIndex + 1; 75 mResult = src.mResult; 76 } 77 getStepDescription(final int step)78 private String getStepDescription(final int step) { 79 switch (step) { 80 case COMPRESSION: 81 return "compression"; 82 case ENCRYPTION: 83 return "encryption"; 84 default: 85 return "unknown"; 86 } 87 } 88 describeChain()89 public String describeChain() { 90 final StringBuilder s = new StringBuilder("raw"); 91 for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) { 92 s.append(" > "); 93 s.append(getStepDescription(step)); 94 } 95 return s.toString(); 96 } 97 98 /** 99 * Returns the next sequential spec. If exhausted, return null. 100 */ next()101 public DecoderChainSpec next() { 102 if (mDecoderSpecIndex + 1 >= VALID_DECODER_CHAINS.length) { 103 return null; 104 } 105 return new DecoderChainSpec(this); 106 } 107 getStream(final File src)108 public InputStream getStream(final File src) throws FileNotFoundException, IOException { 109 InputStream input = new BufferedInputStream(new FileInputStream(src)); 110 for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) { 111 switch (step) { 112 case COMPRESSION: 113 input = Compress.getUncompressedStream(input); 114 break; 115 case ENCRYPTION: 116 input = Crypt.getDecryptedStream(input); 117 break; 118 } 119 } 120 return input; 121 } 122 } 123 124 public interface InputProcessor<T> { 125 @Nonnull process(@onnull final InputStream input)126 public T process(@Nonnull final InputStream input) 127 throws IOException, UnsupportedFormatException; 128 } 129 130 public static class CopyProcessor implements InputProcessor<File> { 131 @Override @Nonnull process(@onnull final InputStream input)132 public File process(@Nonnull final InputStream input) throws IOException, 133 UnsupportedFormatException { 134 final File dst = File.createTempFile(PREFIX, SUFFIX); 135 dst.deleteOnExit(); 136 try (final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))) { 137 copy(input, output); 138 output.flush(); 139 output.close(); 140 if (BinaryDictDecoderUtils.isBinaryDictionary(dst) 141 || CombinedInputOutput.isCombinedDictionary(dst.getAbsolutePath())) { 142 return dst; 143 } 144 } 145 throw new UnsupportedFormatException("Input stream not at the expected format"); 146 } 147 } 148 149 public static class HeaderReaderProcessor implements InputProcessor<DictionaryHeader> { 150 // Arbitrarily limit the header length to 32k. Sounds like it would never be larger 151 // than this. Revisit this if needed later. 152 private final int MAX_HEADER_LENGTH = 32 * 1024; 153 @Override @Nonnull process(final InputStream input)154 public DictionaryHeader process(final InputStream input) throws IOException, 155 UnsupportedFormatException { 156 // Do everything as curtly and ad-hoc as possible for performance. 157 final byte[] tmpBuffer = new byte[12]; 158 if (tmpBuffer.length != input.read(tmpBuffer)) { 159 throw new UnsupportedFormatException("File too short, not a dictionary"); 160 } 161 // Ad-hoc check for the magic number. See FormatSpec.java as well as 162 // byte_array_utils.h and BinaryDictEncoderUtils#writeDictionaryHeader(). 163 final int MAGIC_NUMBER_START_OFFSET = 0; 164 final int VERSION_START_OFFSET = 4; 165 final int HEADER_SIZE_OFFSET = 8; 166 final int magicNumber = ((tmpBuffer[MAGIC_NUMBER_START_OFFSET] & 0xFF) << 24) 167 + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 1] & 0xFF) << 16) 168 + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 2] & 0xFF) << 8) 169 + (tmpBuffer[MAGIC_NUMBER_START_OFFSET + 3] & 0xFF); 170 if (magicNumber != FormatSpec.MAGIC_NUMBER) { 171 throw new UnsupportedFormatException("Wrong magic number"); 172 } 173 final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8) 174 + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF); 175 if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201 176 && version != FormatSpec.VERSION202) { 177 throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported"); 178 } 179 final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24) 180 + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16) 181 + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8) 182 + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF); 183 if (totalHeaderSize > MAX_HEADER_LENGTH) { 184 throw new UnsupportedFormatException("Header too large"); 185 } 186 final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length]; 187 readStreamExhaustively(input, headerBuffer); 188 final HashMap<String, String> attributes = 189 BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer); 190 return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes), 191 new FormatOptions(version, false /* hasTimestamp */)); 192 } 193 } 194 readStreamExhaustively(final InputStream inputStream, final byte[] outBuffer)195 private static void readStreamExhaustively(final InputStream inputStream, 196 final byte[] outBuffer) throws IOException, UnsupportedFormatException { 197 int readBytes = 0; 198 int readBytesLastCycle = -1; 199 while (readBytes != outBuffer.length) { 200 readBytesLastCycle = inputStream.read(outBuffer, readBytes, 201 outBuffer.length - readBytes); 202 if (readBytesLastCycle == -1) 203 throw new UnsupportedFormatException("File shorter than specified in the header" 204 + " (expected " + outBuffer.length + ", read " + readBytes + ")"); 205 readBytes += readBytesLastCycle; 206 } 207 } 208 copy(final InputStream input, final OutputStream output)209 public static void copy(final InputStream input, final OutputStream output) throws IOException { 210 final byte[] buffer = new byte[COPY_BUFFER_SIZE]; 211 for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) { 212 output.write(buffer, 0, readBytes); 213 } 214 } 215 216 /** 217 * Process a dictionary, decrypting/uncompressing it on the fly as necessary. 218 * 219 * This will execute the given processor repeatedly with the possible alternatives 220 * for dictionary format until the processor does not throw an exception. 221 * If the processor succeeds for none of the possible formats, the method returns null. 222 */ 223 @Nullable decodeDictionaryForProcess(@onnull final File src, @Nonnull final InputProcessor<T> processor)224 public static <T> DecoderChainSpec<T> decodeDictionaryForProcess(@Nonnull final File src, 225 @Nonnull final InputProcessor<T> processor) { 226 @Nonnull DecoderChainSpec spec = new DecoderChainSpec(); 227 while (null != spec) { 228 try { 229 final InputStream input = spec.getStream(src); 230 spec.mResult = processor.process(input); 231 try { 232 input.close(); 233 } catch (IOException e) { 234 // CipherInputStream doesn't like being closed without having read the 235 // entire stream, for some reason. But we don't want to because it's a waste 236 // of resources. We really, really don't care about this. 237 // However on close() CipherInputStream does throw this exception, wrapped 238 // in an IOException so we need to catch it. 239 if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) { 240 throw e; 241 } 242 } 243 return spec; 244 } catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) { 245 // If the format is not the right one for this file, the processor will throw one 246 // of these exceptions. In our case, that means we should try the next spec, 247 // since it may still be at another format we haven't tried yet. 248 // TODO: stop using exceptions for this non-exceptional case. 249 } 250 spec = spec.next(); 251 } 252 return null; 253 } 254 255 /** 256 * Get a decoder chain spec with a raw dictionary file. This makes a new file on the 257 * disk ready for any treatment the client wants. 258 */ 259 @Nullable getRawDictionaryOrNull(@onnull final File src)260 public static DecoderChainSpec<File> getRawDictionaryOrNull(@Nonnull final File src) { 261 return decodeDictionaryForProcess(src, new CopyProcessor()); 262 } 263 getDictionary(final String filename, final boolean report)264 static FusionDictionary getDictionary(final String filename, final boolean report) { 265 final File file = new File(filename); 266 if (report) { 267 System.out.println("Dictionary : " + file.getAbsolutePath()); 268 System.out.println("Size : " + file.length() + " bytes"); 269 } 270 try { 271 final DecoderChainSpec<File> decodedSpec = getRawDictionaryOrNull(file); 272 if (null == decodedSpec) { 273 throw new RuntimeException("Does not seem to be a dictionary file " + filename); 274 } 275 if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mResult.getAbsolutePath())) { 276 if (report) { 277 System.out.println("Format : Combined format"); 278 System.out.println("Packaging : " + decodedSpec.describeChain()); 279 System.out.println("Uncompressed size : " + decodedSpec.mResult.length()); 280 } 281 try (final BufferedReader reader = new BufferedReader( 282 new InputStreamReader(new FileInputStream(decodedSpec.mResult), "UTF-8"))) { 283 return CombinedInputOutput.readDictionaryCombined(reader); 284 } 285 } 286 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder( 287 decodedSpec.mResult, 0, decodedSpec.mResult.length(), 288 DictDecoder.USE_BYTEARRAY); 289 if (report) { 290 System.out.println("Format : Binary dictionary format"); 291 System.out.println("Packaging : " + decodedSpec.describeChain()); 292 System.out.println("Uncompressed size : " + decodedSpec.mResult.length()); 293 } 294 return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); 295 } catch (final IOException | UnsupportedFormatException e) { 296 throw new RuntimeException("Can't read file " + filename, e); 297 } 298 } 299 } 300