• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 
17 package com.android.inputmethod.latin.dicttool;
18 
19 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
20 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
21 import com.android.inputmethod.latin.makedict.DictDecoder;
22 import com.android.inputmethod.latin.makedict.FusionDictionary;
23 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
24 
25 import org.xml.sax.SAXException;
26 
27 import java.io.BufferedInputStream;
28 import java.io.BufferedOutputStream;
29 import java.io.BufferedReader;
30 import java.io.File;
31 import java.io.FileInputStream;
32 import java.io.FileOutputStream;
33 import java.io.IOException;
34 import java.io.InputStream;
35 import java.io.InputStreamReader;
36 import java.io.OutputStream;
37 import java.util.ArrayList;
38 
39 import javax.xml.parsers.ParserConfigurationException;
40 
41 /**
42  * Class grouping utilities for offline dictionary making.
43  *
44  * Those should not be used on-device, essentially because they are quite
45  * liberal about I/O and performance.
46  */
47 public final class BinaryDictOffdeviceUtils {
48     // Prefix and suffix are arbitrary, the values do not really matter
49     private final static String PREFIX = "dicttool";
50     private final static String SUFFIX = ".tmp";
51 
52     public final static String COMPRESSION = "compressed";
53     public final static String ENCRYPTION = "encrypted";
54 
55     private final static int MAX_DECODE_DEPTH = 8;
56     private final static int COPY_BUFFER_SIZE = 8192;
57 
58     public static class DecoderChainSpec {
59         ArrayList<String> mDecoderSpec = new ArrayList<>();
60         File mFile;
61 
addStep(final String stepDescription)62         public DecoderChainSpec addStep(final String stepDescription) {
63             mDecoderSpec.add(stepDescription);
64             return this;
65         }
66 
describeChain()67         public String describeChain() {
68             final StringBuilder s = new StringBuilder("raw");
69             for (final String step : mDecoderSpec) {
70                 s.append(" > ");
71                 s.append(step);
72             }
73             return s.toString();
74         }
75     }
76 
copy(final InputStream input, final OutputStream output)77     public static void copy(final InputStream input, final OutputStream output) throws IOException {
78         final byte[] buffer = new byte[COPY_BUFFER_SIZE];
79         for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
80             output.write(buffer, 0, readBytes);
81         }
82     }
83 
84     /**
85      * Returns a decrypted/uncompressed dictionary.
86      *
87      * This will decrypt/uncompress any number of times as necessary until it finds the
88      * dictionary signature, and copy the decoded file to a temporary place.
89      * If this is not a dictionary, the method returns null.
90      */
getRawDictionaryOrNull(final File src)91     public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
92         return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0);
93     }
94 
getRawDictionaryOrNullInternal( final DecoderChainSpec spec, final File src, final int depth)95     private static DecoderChainSpec getRawDictionaryOrNullInternal(
96             final DecoderChainSpec spec, final File src, final int depth) {
97         // Unfortunately the decoding scheme we use can consider any data to be encrypted
98         // and will product some output, meaning it's not possible to reliably detect encrypted
99         // data. Thus, some non-dictionary files (especially small) ones may successfully decrypt
100         // over and over, ending in a stack overflow. Hence we limit the depth at which we try
101         // decoding the file.
102         if (depth > MAX_DECODE_DEPTH) return null;
103         if (BinaryDictDecoderUtils.isBinaryDictionary(src)
104                 || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
105             spec.mFile = src;
106             return spec;
107         }
108         // It's not a raw dictionary - try to see if it's compressed.
109         final File uncompressedFile = tryGetUncompressedFile(src);
110         if (null != uncompressedFile) {
111             final DecoderChainSpec newSpec =
112                     getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
113             if (null == newSpec) return null;
114             return newSpec.addStep(COMPRESSION);
115         }
116         // It's not a compressed either - try to see if it's crypted.
117         final File decryptedFile = tryGetDecryptedFile(src);
118         if (null != decryptedFile) {
119             final DecoderChainSpec newSpec =
120                     getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
121             if (null == newSpec) return null;
122             return newSpec.addStep(ENCRYPTION);
123         }
124         return null;
125     }
126 
127     /* Try to uncompress the file passed as an argument.
128      *
129      * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
130      * is returned.
131      */
tryGetUncompressedFile(final File src)132     private static File tryGetUncompressedFile(final File src) {
133         try {
134             final File dst = File.createTempFile(PREFIX, SUFFIX);
135             dst.deleteOnExit();
136             try (
137                 final InputStream input = Compress.getUncompressedStream(
138                         new BufferedInputStream(new FileInputStream(src)));
139                 final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
140             ) {
141                 copy(input, output);
142                 return dst;
143             }
144         } catch (final IOException e) {
145             // Could not uncompress the file: presumably the file is simply not a compressed file
146             return null;
147         }
148     }
149 
150     /* Try to decrypt the file passed as an argument.
151      *
152      * If the file can be decrypted, the decrypted version is returned. Otherwise, null
153      * is returned.
154      */
tryGetDecryptedFile(final File src)155     private static File tryGetDecryptedFile(final File src) {
156         try {
157             final File dst = File.createTempFile(PREFIX, SUFFIX);
158             dst.deleteOnExit();
159             try (
160                 final InputStream input = Crypt.getDecryptedStream(
161                         new BufferedInputStream(new FileInputStream(src)));
162                 final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
163             ) {
164                 copy(input, output);
165                 return dst;
166             }
167         } catch (final IOException e) {
168             // Could not decrypt the file: presumably the file is simply not a crypted file
169             return null;
170         }
171     }
172 
getDictionary(final String filename, final boolean report)173     static FusionDictionary getDictionary(final String filename, final boolean report) {
174         final File file = new File(filename);
175         if (report) {
176             System.out.println("Dictionary : " + file.getAbsolutePath());
177             System.out.println("Size : " + file.length() + " bytes");
178         }
179         try {
180             if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) {
181                 if (report) {
182                     System.out.println("Format : XML unigram list");
183                 }
184                 return XmlDictInputOutput.readDictionaryXml(
185                         new BufferedInputStream(new FileInputStream(file)),
186                         null /* shortcuts */, null /* bigrams */);
187             }
188             final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
189             if (null == decodedSpec) {
190                 throw new RuntimeException("Does not seem to be a dictionary file " + filename);
191             }
192             if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) {
193                 if (report) {
194                     System.out.println("Format : Combined format");
195                     System.out.println("Packaging : " + decodedSpec.describeChain());
196                     System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
197                 }
198                 try (final BufferedReader reader = new BufferedReader(
199                         new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) {
200                     return CombinedInputOutput.readDictionaryCombined(reader);
201                 }
202             }
203             final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
204                     decodedSpec.mFile, 0, decodedSpec.mFile.length(),
205                     DictDecoder.USE_BYTEARRAY);
206             if (report) {
207                 System.out.println("Format : Binary dictionary format");
208                 System.out.println("Packaging : " + decodedSpec.describeChain());
209                 System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
210             }
211             return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
212         } catch (final IOException | SAXException | ParserConfigurationException |
213                 UnsupportedFormatException e) {
214             throw new RuntimeException("Can't read file " + filename, e);
215         }
216     }
217 }
218