• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin.makedict;
18 
19 import com.android.inputmethod.annotations.UsedForTesting;
20 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
21 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
22 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
23 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
24 
25 import java.io.File;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.OutputStream;
30 import java.util.ArrayList;
31 import java.util.Iterator;
32 
33 /**
34  * An implementation of DictEncoder for version 2 binary dictionary.
35  */
36 @UsedForTesting
37 public class Ver2DictEncoder implements DictEncoder {
38 
39     private final File mDictFile;
40     private OutputStream mOutStream;
41     private byte[] mBuffer;
42     private int mPosition;
43 
44     @UsedForTesting
Ver2DictEncoder(final File dictFile)45     public Ver2DictEncoder(final File dictFile) {
46         mDictFile = dictFile;
47         mOutStream = null;
48         mBuffer = null;
49     }
50 
51     // This constructor is used only by BinaryDictOffdeviceUtilsTests.
52     // If you want to use this in the production code, you should consider keeping consistency of
53     // the interface of Ver3DictDecoder by using factory.
54     @UsedForTesting
Ver2DictEncoder(final OutputStream outStream)55     public Ver2DictEncoder(final OutputStream outStream) {
56         mDictFile = null;
57         mOutStream = outStream;
58     }
59 
openStream()60     private void openStream() throws FileNotFoundException {
61         mOutStream = new FileOutputStream(mDictFile);
62     }
63 
close()64     private void close() throws IOException {
65         if (mOutStream != null) {
66             mOutStream.close();
67             mOutStream = null;
68         }
69     }
70 
71     @Override
writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)72     public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
73             throws IOException, UnsupportedFormatException {
74         if (formatOptions.mVersion > FormatSpec.VERSION2) {
75             throw new UnsupportedFormatException(
76                     "The given format options has wrong version number : "
77                     + formatOptions.mVersion);
78         }
79 
80         if (mOutStream == null) {
81             openStream();
82         }
83         BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions);
84 
85         // Addresses are limited to 3 bytes, but since addresses can be relative to each node
86         // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding
87         // the order of the PtNode arrays becomes a quite complicated problem, because though the
88         // dictionary itself does not have a size limit, each node array must still be within 16MB
89         // of all its children and parents. As long as this is ensured, the dictionary file may
90         // grow to any size.
91 
92         // Leave the choice of the optimal node order to the flattenTree function.
93         MakedictLog.i("Flattening the tree...");
94         ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
95 
96         MakedictLog.i("Computing addresses...");
97         BinaryDictEncoderUtils.computeAddresses(dict, flatNodes);
98         MakedictLog.i("Checking PtNode array...");
99         if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
100 
101         // Create a buffer that matches the final dictionary size.
102         final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
103         final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
104         mBuffer = new byte[bufferSize];
105 
106         MakedictLog.i("Writing file...");
107 
108         for (PtNodeArray nodeArray : flatNodes) {
109             BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray);
110         }
111         if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes);
112         mOutStream.write(mBuffer, 0, mPosition);
113 
114         MakedictLog.i("Done");
115         close();
116     }
117 
118     @Override
setPosition(final int position)119     public void setPosition(final int position) {
120         if (mBuffer == null || position < 0 || position >= mBuffer.length) return;
121         mPosition = position;
122     }
123 
124     @Override
getPosition()125     public int getPosition() {
126         return mPosition;
127     }
128 
129     @Override
writePtNodeCount(final int ptNodeCount)130     public void writePtNodeCount(final int ptNodeCount) {
131         final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
132         if (countSize != 1 && countSize != 2) {
133             throw new RuntimeException("Strange size from getGroupCountSize : " + countSize);
134         }
135         final int encodedPtNodeCount = (countSize == 2) ?
136                 (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount;
137         mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, encodedPtNodeCount,
138                 countSize);
139     }
140 
writePtNodeFlags(final PtNode ptNode)141     private void writePtNodeFlags(final PtNode ptNode) {
142         final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode);
143         mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition,
144                 BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos),
145                 FormatSpec.PTNODE_FLAGS_SIZE);
146     }
147 
writeCharacters(final int[] codePoints, final boolean hasSeveralChars)148     private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) {
149         mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition);
150         if (hasSeveralChars) {
151             mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
152         }
153     }
154 
writeFrequency(final int frequency)155     private void writeFrequency(final int frequency) {
156         if (frequency >= 0) {
157             mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, frequency,
158                     FormatSpec.PTNODE_FREQUENCY_SIZE);
159         }
160     }
161 
writeChildrenPosition(final PtNode ptNode)162     private void writeChildrenPosition(final PtNode ptNode) {
163         final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode);
164         mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
165                 childrenPos);
166     }
167 
168     /**
169      * Write a shortcut attributes list to mBuffer.
170      *
171      * @param shortcuts the shortcut attributes list.
172      */
writeShortcuts(final ArrayList<WeightedString> shortcuts)173     private void writeShortcuts(final ArrayList<WeightedString> shortcuts) {
174         if (null == shortcuts || shortcuts.isEmpty()) return;
175 
176         final int indexOfShortcutByteSize = mPosition;
177         mPosition += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
178         final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
179         while (shortcutIterator.hasNext()) {
180             final WeightedString target = shortcutIterator.next();
181             final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
182                     shortcutIterator.hasNext(),
183                     target.getProbability());
184             mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags,
185                     FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
186             final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord);
187             mPosition += shortcutShift;
188         }
189         final int shortcutByteSize = mPosition - indexOfShortcutByteSize;
190         if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) {
191             throw new RuntimeException("Shortcut list too large");
192         }
193         BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, indexOfShortcutByteSize, shortcutByteSize,
194                 FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
195     }
196 
197     /**
198      * Write a bigram attributes list to mBuffer.
199      *
200      * @param bigrams the bigram attributes list.
201      * @param dict the dictionary the node array is a part of (for relative offsets).
202      */
writeBigrams(final ArrayList<WeightedString> bigrams, final FusionDictionary dict)203     private void writeBigrams(final ArrayList<WeightedString> bigrams,
204             final FusionDictionary dict) {
205         if (bigrams == null) return;
206 
207         final Iterator<WeightedString> bigramIterator = bigrams.iterator();
208         while (bigramIterator.hasNext()) {
209             final WeightedString bigram = bigramIterator.next();
210             final PtNode target =
211                     FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
212             final int addressOfBigram = target.mCachedAddressAfterUpdate;
213             final int unigramFrequencyForThisWord = target.getProbability();
214             final int offset = addressOfBigram
215                     - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
216             final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
217                     offset, bigram.getProbability(), unigramFrequencyForThisWord, bigram.mWord);
218             mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags,
219                     FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
220             mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
221                     Math.abs(offset));
222         }
223     }
224 
225     @Override
writeForwardLinkAddress(final int forwardLinkAddress)226     public void writeForwardLinkAddress(final int forwardLinkAddress) {
227         mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, forwardLinkAddress,
228                 FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
229     }
230 
231     @Override
writePtNode(final PtNode ptNode, final FusionDictionary dict)232     public void writePtNode(final PtNode ptNode, final FusionDictionary dict) {
233         writePtNodeFlags(ptNode);
234         writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
235         writeFrequency(ptNode.getProbability());
236         writeChildrenPosition(ptNode);
237         writeShortcuts(ptNode.mShortcutTargets);
238         writeBigrams(ptNode.mBigrams, dict);
239     }
240 }
241