• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 2012-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.text;
11 
12 import java.io.IOException;
13 import java.nio.ByteBuffer;
14 
15 import com.ibm.icu.impl.Assert;
16 import com.ibm.icu.impl.ICUBinary;
17 import com.ibm.icu.impl.ICUData;
18 import com.ibm.icu.impl.ICUResourceBundle;
19 import com.ibm.icu.util.UResourceBundle;
20 
21 final class DictionaryData {
22     // disallow instantiation
DictionaryData()23     private DictionaryData() { }
24 
25     public static final int TRIE_TYPE_BYTES = 0;
26     public static final int TRIE_TYPE_UCHARS = 1;
27     public static final int TRIE_TYPE_MASK = 7;
28     public static final int TRIE_HAS_VALUES = 8;
29     public static final int TRANSFORM_NONE = 0;
30     public static final int TRANSFORM_TYPE_OFFSET = 0x1000000;
31     public static final int TRANSFORM_TYPE_MASK = 0x7f000000;
32     public static final int TRANSFORM_OFFSET_MASK = 0x1fffff;
33 
34     public static final int IX_STRING_TRIE_OFFSET = 0;
35     public static final int IX_RESERVED1_OFFSET = 1;
36     public static final int IX_RESERVED2_OFFSET = 2;
37     public static final int IX_TOTAL_SIZE = 3;
38     public static final int IX_TRIE_TYPE = 4;
39     public static final int IX_TRANSFORM = 5;
40     public static final int IX_RESERVED6 = 6;
41     public static final int IX_RESERVED7 = 7;
42     public static final int IX_COUNT = 8;
43 
44     private static final int DATA_FORMAT_ID = 0x44696374;
45 
loadDictionaryFor(String dictType)46     public static DictionaryMatcher loadDictionaryFor(String dictType) throws IOException {
47         ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_BRKITR_BASE_NAME);
48         String dictFileName = rb.getStringWithFallback("dictionaries/" + dictType);
49         dictFileName = ICUData.ICU_BRKITR_NAME + '/' + dictFileName;
50         ByteBuffer bytes = ICUBinary.getRequiredData(dictFileName);
51         ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null);
52         int[] indexes = new int[IX_COUNT];
53         // TODO: read indexes[IX_STRING_TRIE_OFFSET] first, then read a variable-length indexes[]
54         for (int i = 0; i < IX_COUNT; i++) {
55             indexes[i] = bytes.getInt();
56         }
57         int offset = indexes[IX_STRING_TRIE_OFFSET];
58         Assert.assrt(offset >= (4 * IX_COUNT));
59         if (offset > (4 * IX_COUNT)) {
60             int diff = offset - (4 * IX_COUNT);
61             ICUBinary.skipBytes(bytes, diff);
62         }
63         int trieType = indexes[IX_TRIE_TYPE] & TRIE_TYPE_MASK;
64         int totalSize = indexes[IX_TOTAL_SIZE] - offset;
65         DictionaryMatcher m = null;
66         if (trieType == TRIE_TYPE_BYTES) {
67             int transform = indexes[IX_TRANSFORM];
68             byte[] data = new byte[totalSize];
69             bytes.get(data);
70             m = new BytesDictionaryMatcher(data, transform);
71         } else if (trieType == TRIE_TYPE_UCHARS) {
72             Assert.assrt(totalSize % 2 == 0);
73             String data = ICUBinary.getString(bytes, totalSize / 2, totalSize & 1);
74             m = new CharsDictionaryMatcher(data);
75         } else {
76             m = null;
77         }
78         return m;
79     }
80 }
81