• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin;
18 
19 import android.content.Context;
20 
21 /**
22  * Base class for an in-memory dictionary that can grow dynamically and can
23  * be searched for suggestions and valid words.
24  */
25 public class ExpandableDictionary extends Dictionary {
26     private Context mContext;
27     private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
28     private int mMaxDepth;
29     private int mInputLength;
30 
31     public static final int MAX_WORD_LENGTH = 32;
32     private static final char QUOTE = '\'';
33 
34     static class Node {
35         char code;
36         int frequency;
37         boolean terminal;
38         NodeArray children;
39     }
40 
41     static class NodeArray {
42         Node[] data;
43         int length = 0;
44         private static final int INCREMENT = 2;
45 
NodeArray()46         NodeArray() {
47             data = new Node[INCREMENT];
48         }
49 
add(Node n)50         void add(Node n) {
51             if (length + 1 > data.length) {
52                 Node[] tempData = new Node[length + INCREMENT];
53                 if (length > 0) {
54                     System.arraycopy(data, 0, tempData, 0, length);
55                 }
56                 data = tempData;
57             }
58             data[length++] = n;
59         }
60     }
61 
62     private NodeArray mRoots;
63 
64     private int[][] mCodes;
65 
ExpandableDictionary(Context context)66     ExpandableDictionary(Context context) {
67         mContext = context;
68         clearDictionary();
69         mCodes = new int[MAX_WORD_LENGTH][];
70     }
71 
getContext()72     Context getContext() {
73         return mContext;
74     }
75 
getMaxWordLength()76     int getMaxWordLength() {
77         return MAX_WORD_LENGTH;
78     }
79 
addWord(String word, int frequency)80     public void addWord(String word, int frequency) {
81         addWordRec(mRoots, word, 0, frequency);
82     }
83 
addWordRec(NodeArray children, final String word, final int depth, final int frequency)84     private void addWordRec(NodeArray children, final String word,
85             final int depth, final int frequency) {
86 
87         final int wordLength = word.length();
88         final char c = word.charAt(depth);
89         // Does children have the current character?
90         final int childrenLength = children.length;
91         Node childNode = null;
92         boolean found = false;
93         for (int i = 0; i < childrenLength; i++) {
94             childNode = children.data[i];
95             if (childNode.code == c) {
96                 found = true;
97                 break;
98             }
99         }
100         if (!found) {
101             childNode = new Node();
102             childNode.code = c;
103             children.add(childNode);
104         }
105         if (wordLength == depth + 1) {
106             // Terminate this word
107             childNode.terminal = true;
108             childNode.frequency += frequency; // If there are multiple similar words
109             if (childNode.frequency > 256) childNode.frequency = 256;
110             return;
111         }
112         if (childNode.children == null) {
113             childNode.children = new NodeArray();
114         }
115         addWordRec(childNode.children, word, depth + 1, frequency);
116     }
117 
118     @Override
getWords(final WordComposer codes, final WordCallback callback)119     public void getWords(final WordComposer codes, final WordCallback callback) {
120         mInputLength = codes.size();
121         if (mCodes.length < mInputLength) mCodes = new int[mInputLength][];
122         // Cache the codes so that we don't have to lookup an array list
123         for (int i = 0; i < mInputLength; i++) {
124             mCodes[i] = codes.getCodesAt(i);
125         }
126         mMaxDepth = mInputLength * 3;
127         getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback);
128         for (int i = 0; i < mInputLength; i++) {
129             getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback);
130         }
131     }
132 
133     @Override
isValidWord(CharSequence word)134     public synchronized boolean isValidWord(CharSequence word) {
135         final int freq = getWordFrequencyRec(mRoots, word, 0, word.length());
136         return freq > -1;
137     }
138 
139     /**
140      * Returns the word's frequency or -1 if not found
141      */
getWordFrequency(CharSequence word)142     public int getWordFrequency(CharSequence word) {
143         return getWordFrequencyRec(mRoots, word, 0, word.length());
144     }
145 
146     /**
147      * Returns the word's frequency or -1 if not found
148      */
getWordFrequencyRec(final NodeArray children, final CharSequence word, final int offset, final int length)149     private int getWordFrequencyRec(final NodeArray children, final CharSequence word,
150             final int offset, final int length) {
151         final int count = children.length;
152         char currentChar = word.charAt(offset);
153         for (int j = 0; j < count; j++) {
154             final Node node = children.data[j];
155             if (node.code == currentChar) {
156                 if (offset == length - 1) {
157                     if (node.terminal) {
158                         return node.frequency;
159                     }
160                 } else {
161                     if (node.children != null) {
162                         int freq = getWordFrequencyRec(node.children, word, offset + 1, length);
163                         if (freq > -1) return freq;
164                     }
165                 }
166             }
167         }
168         return -1;
169     }
170 
171     /**
172      * Recursively traverse the tree for words that match the input. Input consists of
173      * a list of arrays. Each item in the list is one input character position. An input
174      * character is actually an array of multiple possible candidates. This function is not
175      * optimized for speed, assuming that the user dictionary will only be a few hundred words in
176      * size.
177      * @param roots node whose children have to be search for matches
178      * @param codes the input character codes
179      * @param word the word being composed as a possible match
180      * @param depth the depth of traversal - the length of the word being composed thus far
181      * @param completion whether the traversal is now in completion mode - meaning that we've
182      * exhausted the input and we're looking for all possible suffixes.
183      * @param snr current weight of the word being formed
184      * @param inputIndex position in the input characters. This can be off from the depth in
185      * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
186      * "wouldve", it could be matching "would've", so the depth will be one more than the
187      * inputIndex
188      * @param callback the callback class for adding a word
189      */
getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, final int depth, boolean completion, int snr, int inputIndex, int skipPos, WordCallback callback)190     protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
191             final int depth, boolean completion, int snr, int inputIndex, int skipPos,
192             WordCallback callback) {
193         final int count = roots.length;
194         final int codeSize = mInputLength;
195         // Optimization: Prune out words that are too long compared to how much was typed.
196         if (depth > mMaxDepth) {
197             return;
198         }
199         int[] currentChars = null;
200         if (codeSize <= inputIndex) {
201             completion = true;
202         } else {
203             currentChars = mCodes[inputIndex];
204         }
205 
206         for (int i = 0; i < count; i++) {
207             final Node node = roots.data[i];
208             final char c = node.code;
209             final char lowerC = toLowerCase(c);
210             final boolean terminal = node.terminal;
211             final NodeArray children = node.children;
212             final int freq = node.frequency;
213             if (completion) {
214                 word[depth] = c;
215                 if (terminal) {
216                     if (!callback.addWord(word, 0, depth + 1, freq * snr)) {
217                         return;
218                     }
219                 }
220                 if (children != null) {
221                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
222                             skipPos, callback);
223                 }
224             } else if ((c == QUOTE && currentChars[0] != QUOTE) || depth == skipPos) {
225                 // Skip the ' and continue deeper
226                 word[depth] = c;
227                 if (children != null) {
228                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
229                             skipPos, callback);
230                 }
231             } else {
232                 // Don't use alternatives if we're looking for missing characters
233                 final int alternativesSize = skipPos >= 0? 1 : currentChars.length;
234                 for (int j = 0; j < alternativesSize; j++) {
235                     final int addedAttenuation = (j > 0 ? 1 : 2);
236                     final int currentChar = currentChars[j];
237                     if (currentChar == -1) {
238                         break;
239                     }
240                     if (currentChar == lowerC || currentChar == c) {
241                         word[depth] = c;
242 
243                         if (codeSize == depth + 1) {
244                             if (terminal) {
245                                 if (INCLUDE_TYPED_WORD_IF_VALID
246                                         || !same(word, depth + 1, codes.getTypedWord())) {
247                                     int finalFreq = freq * snr * addedAttenuation;
248                                     if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER;
249                                     callback.addWord(word, 0, depth + 1, finalFreq);
250                                 }
251                             }
252                             if (children != null) {
253                                 getWordsRec(children, codes, word, depth + 1,
254                                         true, snr * addedAttenuation, inputIndex + 1,
255                                         skipPos, callback);
256                             }
257                         } else if (children != null) {
258                             getWordsRec(children, codes, word, depth + 1,
259                                     false, snr * addedAttenuation, inputIndex + 1,
260                                     skipPos, callback);
261                         }
262                     }
263                 }
264             }
265         }
266     }
267 
clearDictionary()268     protected void clearDictionary() {
269         mRoots = new NodeArray();
270     }
271 
toLowerCase(char c)272     static char toLowerCase(char c) {
273         if (c < BASE_CHARS.length) {
274             c = BASE_CHARS[c];
275         }
276         if (c >= 'A' && c <= 'Z') {
277             c = (char) (c | 32);
278         } else if (c > 127) {
279             c = Character.toLowerCase(c);
280         }
281         return c;
282     }
283 
284     /**
285      * Table mapping most combined Latin, Greek, and Cyrillic characters
286      * to their base characters.  If c is in range, BASE_CHARS[c] == c
287      * if c is not a combined character, or the base character if it
288      * is combined.
289      */
290     static final char BASE_CHARS[] = {
291         0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
292         0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
293         0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
294         0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
295         0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
296         0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
297         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
298         0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
299         0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
300         0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
301         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
302         0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
303         0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
304         0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
305         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
306         0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
307         0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
308         0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
309         0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
310         0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
311         0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
312         0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
313         0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
314         0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
315         0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
316         0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
317         0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
318         0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
319                                                                         // Manually changed df to 73
320         0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
321         0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
322         0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
323         0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
324         0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
325         0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
326         0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
327         0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
328         0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
329         0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
330         0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
331         0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
332         0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
333         0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
334         0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
335         0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
336         0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
337         0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
338         0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
339         0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
340         0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
341         0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
342         0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
343         0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
344         0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
345         0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
346         0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
347         0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
348         0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
349         0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
350         0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
351         0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
352         0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
353         0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
354         0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
355         0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
356         0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
357         0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
358         0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
359         0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
360         0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
361         0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
362         0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
363         0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
364         0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
365         0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
366         0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
367         0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
368         0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
369         0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
370         0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
371         0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
372         0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
373         0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
374         0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
375         0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
376         0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
377         0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
378         0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
379         0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
380         0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
381         0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
382         0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
383         0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
384         0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
385         0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
386         0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
387         0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
388         0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
389         0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
390         0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
391         0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
392         0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
393         0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
394         0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
395         0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
396         0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
397         0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
398         0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
399         0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
400         0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
401         0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
402         0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
403         0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
404         0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
405         0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
406         0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
407         0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
408         0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
409         0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
410         0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
411         0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
412         0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
413         0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
414         0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
415         0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
416         0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
417         0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
418         0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
419         0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
420         0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
421         0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
422         0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
423         0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
424         0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
425         0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
426         0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
427         0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
428         0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
429         0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
430         0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
431         0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
432         0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
433         0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
434         0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
435         0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
436         0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
437         0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
438         0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
439         0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
440         0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
441         0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
442         0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
443         0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
444         0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
445         0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
446         0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
447         0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
448         0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
449         0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
450         0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
451         0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
452     };
453 
454     // generated with:
455     // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
456 
457 }
458