• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin;
18 
19 import android.content.Context;
20 
21 import com.android.inputmethod.keyboard.Keyboard;
22 import com.android.inputmethod.keyboard.ProximityInfo;
23 
24 import java.util.LinkedList;
25 
26 /**
27  * Base class for an in-memory dictionary that can grow dynamically and can
28  * be searched for suggestions and valid words.
29  */
30 public class ExpandableDictionary extends Dictionary {
31     /**
32      * There is difference between what java and native code can handle.
33      * It uses 32 because Java stack overflows when greater value is used.
34      */
35     protected static final int MAX_WORD_LENGTH = 32;
36 
37     // Bigram frequency is a fixed point number with 1 meaning 1.2 and 255 meaning 1.8.
38     protected static final int BIGRAM_MAX_FREQUENCY = 255;
39 
40     private Context mContext;
41     private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
42     private int mDicTypeId;
43     private int mMaxDepth;
44     private int mInputLength;
45 
46     private boolean mRequiresReload;
47 
48     private boolean mUpdatingDictionary;
49 
50     // Use this lock before touching mUpdatingDictionary & mRequiresDownload
51     private Object mUpdatingLock = new Object();
52 
53     private static class Node {
54         char mCode;
55         int mFrequency;
56         boolean mTerminal;
57         Node mParent;
58         NodeArray mChildren;
59         LinkedList<NextWord> mNGrams; // Supports ngram
60     }
61 
62     private static class NodeArray {
63         Node[] mData;
64         int mLength = 0;
65         private static final int INCREMENT = 2;
66 
NodeArray()67         NodeArray() {
68             mData = new Node[INCREMENT];
69         }
70 
add(Node n)71         void add(Node n) {
72             if (mLength + 1 > mData.length) {
73                 Node[] tempData = new Node[mLength + INCREMENT];
74                 if (mLength > 0) {
75                     System.arraycopy(mData, 0, tempData, 0, mLength);
76                 }
77                 mData = tempData;
78             }
79             mData[mLength++] = n;
80         }
81     }
82 
83     private static class NextWord {
84         public final Node mWord;
85         private int mFrequency;
86 
NextWord(Node word, int frequency)87         public NextWord(Node word, int frequency) {
88             mWord = word;
89             mFrequency = frequency;
90         }
91 
getFrequency()92         public int getFrequency() {
93             return mFrequency;
94         }
95 
setFrequency(int freq)96         public int setFrequency(int freq) {
97             mFrequency = freq;
98             return mFrequency;
99         }
100 
addFrequency(int add)101         public int addFrequency(int add) {
102             mFrequency += add;
103             if (mFrequency > BIGRAM_MAX_FREQUENCY) mFrequency = BIGRAM_MAX_FREQUENCY;
104             return mFrequency;
105         }
106     }
107 
108     private NodeArray mRoots;
109 
110     private int[][] mCodes;
111 
ExpandableDictionary(Context context, int dicTypeId)112     public ExpandableDictionary(Context context, int dicTypeId) {
113         mContext = context;
114         clearDictionary();
115         mCodes = new int[MAX_WORD_LENGTH][];
116         mDicTypeId = dicTypeId;
117     }
118 
loadDictionary()119     public void loadDictionary() {
120         synchronized (mUpdatingLock) {
121             startDictionaryLoadingTaskLocked();
122         }
123     }
124 
startDictionaryLoadingTaskLocked()125     public void startDictionaryLoadingTaskLocked() {
126         if (!mUpdatingDictionary) {
127             mUpdatingDictionary = true;
128             mRequiresReload = false;
129             new LoadDictionaryTask().start();
130         }
131     }
132 
setRequiresReload(boolean reload)133     public void setRequiresReload(boolean reload) {
134         synchronized (mUpdatingLock) {
135             mRequiresReload = reload;
136         }
137     }
138 
getRequiresReload()139     public boolean getRequiresReload() {
140         return mRequiresReload;
141     }
142 
143     /** Override to load your dictionary here, on a background thread. */
loadDictionaryAsync()144     public void loadDictionaryAsync() {
145         // empty base implementation
146     }
147 
getContext()148     public Context getContext() {
149         return mContext;
150     }
151 
getMaxWordLength()152     public int getMaxWordLength() {
153         return MAX_WORD_LENGTH;
154     }
155 
addWord(String word, int frequency)156     public void addWord(String word, int frequency) {
157         addWordRec(mRoots, word, 0, frequency, null);
158     }
159 
addWordRec(NodeArray children, final String word, final int depth, final int frequency, Node parentNode)160     private void addWordRec(NodeArray children, final String word, final int depth,
161             final int frequency, Node parentNode) {
162         final int wordLength = word.length();
163         if (wordLength <= depth) return;
164         final char c = word.charAt(depth);
165         // Does children have the current character?
166         final int childrenLength = children.mLength;
167         Node childNode = null;
168         for (int i = 0; i < childrenLength; i++) {
169             final Node node = children.mData[i];
170             if (node.mCode == c) {
171                 childNode = node;
172                 break;
173             }
174         }
175         if (childNode == null) {
176             childNode = new Node();
177             childNode.mCode = c;
178             childNode.mParent = parentNode;
179             children.add(childNode);
180         }
181         if (wordLength == depth + 1) {
182             // Terminate this word
183             childNode.mTerminal = true;
184             childNode.mFrequency = Math.max(frequency, childNode.mFrequency);
185             if (childNode.mFrequency > 255) childNode.mFrequency = 255;
186             return;
187         }
188         if (childNode.mChildren == null) {
189             childNode.mChildren = new NodeArray();
190         }
191         addWordRec(childNode.mChildren, word, depth + 1, frequency, childNode);
192     }
193 
194     @Override
getWords(final WordComposer codes, final WordCallback callback, final ProximityInfo proximityInfo)195     public void getWords(final WordComposer codes, final WordCallback callback,
196             final ProximityInfo proximityInfo) {
197         synchronized (mUpdatingLock) {
198             // If we need to update, start off a background task
199             if (mRequiresReload) startDictionaryLoadingTaskLocked();
200             // Currently updating contacts, don't return any results.
201             if (mUpdatingDictionary) return;
202         }
203         getWordsInner(codes, callback, proximityInfo);
204     }
205 
getWordsInner(final WordComposer codes, final WordCallback callback, @SuppressWarnings("unused") final ProximityInfo proximityInfo)206     protected final void getWordsInner(final WordComposer codes, final WordCallback callback,
207             @SuppressWarnings("unused") final ProximityInfo proximityInfo) {
208         mInputLength = codes.size();
209         if (mCodes.length < mInputLength) mCodes = new int[mInputLength][];
210         // Cache the codes so that we don't have to lookup an array list
211         for (int i = 0; i < mInputLength; i++) {
212             mCodes[i] = codes.getCodesAt(i);
213         }
214         mMaxDepth = mInputLength * 3;
215         getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback);
216         for (int i = 0; i < mInputLength; i++) {
217             getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback);
218         }
219     }
220 
221     @Override
isValidWord(CharSequence word)222     public synchronized boolean isValidWord(CharSequence word) {
223         synchronized (mUpdatingLock) {
224             // If we need to update, start off a background task
225             if (mRequiresReload) startDictionaryLoadingTaskLocked();
226             if (mUpdatingDictionary) return false;
227         }
228         return getWordFrequency(word) > -1;
229     }
230 
231     /**
232      * Returns the word's frequency or -1 if not found
233      */
getWordFrequency(CharSequence word)234     protected int getWordFrequency(CharSequence word) {
235         // Case-sensitive search
236         Node node = searchNode(mRoots, word, 0, word.length());
237         return (node == null) ? -1 : node.mFrequency;
238     }
239 
computeSkippedWordFinalFreq(int freq, int snr, int inputLength)240     private static int computeSkippedWordFinalFreq(int freq, int snr, int inputLength) {
241         // The computation itself makes sense for >= 2, but the == 2 case returns 0
242         // anyway so we may as well test against 3 instead and return the constant
243         if (inputLength >= 3) {
244             return (freq * snr * (inputLength - 2)) / (inputLength - 1);
245         } else {
246             return 0;
247         }
248     }
249 
250     /**
251      * Recursively traverse the tree for words that match the input. Input consists of
252      * a list of arrays. Each item in the list is one input character position. An input
253      * character is actually an array of multiple possible candidates. This function is not
254      * optimized for speed, assuming that the user dictionary will only be a few hundred words in
255      * size.
256      * @param roots node whose children have to be search for matches
257      * @param codes the input character codes
258      * @param word the word being composed as a possible match
259      * @param depth the depth of traversal - the length of the word being composed thus far
260      * @param completion whether the traversal is now in completion mode - meaning that we've
261      * exhausted the input and we're looking for all possible suffixes.
262      * @param snr current weight of the word being formed
263      * @param inputIndex position in the input characters. This can be off from the depth in
264      * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
265      * "wouldve", it could be matching "would've", so the depth will be one more than the
266      * inputIndex
267      * @param callback the callback class for adding a word
268      */
269     // TODO: Share this routine with the native code for BinaryDictionary
getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, final int depth, final boolean completion, int snr, int inputIndex, int skipPos, WordCallback callback)270     protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
271             final int depth, final boolean completion, int snr, int inputIndex, int skipPos,
272             WordCallback callback) {
273         final int count = roots.mLength;
274         final int codeSize = mInputLength;
275         // Optimization: Prune out words that are too long compared to how much was typed.
276         if (depth > mMaxDepth) {
277             return;
278         }
279         final int[] currentChars;
280         if (codeSize <= inputIndex) {
281             currentChars = null;
282         } else {
283             currentChars = mCodes[inputIndex];
284         }
285 
286         for (int i = 0; i < count; i++) {
287             final Node node = roots.mData[i];
288             final char c = node.mCode;
289             final char lowerC = toLowerCase(c);
290             final boolean terminal = node.mTerminal;
291             final NodeArray children = node.mChildren;
292             final int freq = node.mFrequency;
293             if (completion || currentChars == null) {
294                 word[depth] = c;
295                 if (terminal) {
296                     final int finalFreq;
297                     if (skipPos < 0) {
298                         finalFreq = freq * snr;
299                     } else {
300                         finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength);
301                     }
302                     if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
303                             DataType.UNIGRAM)) {
304                         return;
305                     }
306                 }
307                 if (children != null) {
308                     getWordsRec(children, codes, word, depth + 1, true, snr, inputIndex,
309                             skipPos, callback);
310                 }
311             } else if ((c == Keyboard.CODE_SINGLE_QUOTE
312                     && currentChars[0] != Keyboard.CODE_SINGLE_QUOTE) || depth == skipPos) {
313                 // Skip the ' and continue deeper
314                 word[depth] = c;
315                 if (children != null) {
316                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
317                             skipPos, callback);
318                 }
319             } else {
320                 // Don't use alternatives if we're looking for missing characters
321                 final int alternativesSize = skipPos >= 0? 1 : currentChars.length;
322                 for (int j = 0; j < alternativesSize; j++) {
323                     final int addedAttenuation = (j > 0 ? 1 : 2);
324                     final int currentChar = currentChars[j];
325                     if (currentChar == -1) {
326                         break;
327                     }
328                     if (currentChar == lowerC || currentChar == c) {
329                         word[depth] = c;
330 
331                         if (codeSize == inputIndex + 1) {
332                             if (terminal) {
333                                 if (INCLUDE_TYPED_WORD_IF_VALID
334                                         || !same(word, depth + 1, codes.getTypedWord())) {
335                                     final int finalFreq;
336                                     if (skipPos < 0) {
337                                         finalFreq = freq * snr * addedAttenuation
338                                                 * FULL_WORD_SCORE_MULTIPLIER;
339                                     } else {
340                                         finalFreq = computeSkippedWordFinalFreq(freq,
341                                                 snr * addedAttenuation, mInputLength);
342                                     }
343                                     callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
344                                             DataType.UNIGRAM);
345                                 }
346                             }
347                             if (children != null) {
348                                 getWordsRec(children, codes, word, depth + 1,
349                                         true, snr * addedAttenuation, inputIndex + 1,
350                                         skipPos, callback);
351                             }
352                         } else if (children != null) {
353                             getWordsRec(children, codes, word, depth + 1,
354                                     false, snr * addedAttenuation, inputIndex + 1,
355                                     skipPos, callback);
356                         }
357                     }
358                 }
359             }
360         }
361     }
362 
setBigram(String word1, String word2, int frequency)363     protected int setBigram(String word1, String word2, int frequency) {
364         return addOrSetBigram(word1, word2, frequency, false);
365     }
366 
addBigram(String word1, String word2, int frequency)367     protected int addBigram(String word1, String word2, int frequency) {
368         return addOrSetBigram(word1, word2, frequency, true);
369     }
370 
371     /**
372      * Adds bigrams to the in-memory trie structure that is being used to retrieve any word
373      * @param frequency frequency for this bigram
374      * @param addFrequency if true, it adds to current frequency, else it overwrites the old value
375      * @return returns the final frequency
376      */
addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency)377     private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) {
378         // We don't want results to be different according to case of the looked up left hand side
379         // word. We do want however to return the correct case for the right hand side.
380         // So we want to squash the case of the left hand side, and preserve that of the right
381         // hand side word.
382         Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null);
383         Node secondWord = searchWord(mRoots, word2, 0, null);
384         LinkedList<NextWord> bigram = firstWord.mNGrams;
385         if (bigram == null || bigram.size() == 0) {
386             firstWord.mNGrams = new LinkedList<NextWord>();
387             bigram = firstWord.mNGrams;
388         } else {
389             for (NextWord nw : bigram) {
390                 if (nw.mWord == secondWord) {
391                     if (addFrequency) {
392                         return nw.addFrequency(frequency);
393                     } else {
394                         return nw.setFrequency(frequency);
395                     }
396                 }
397             }
398         }
399         firstWord.mNGrams.add(new NextWord(secondWord, frequency));
400         return frequency;
401     }
402 
403     /**
404      * Searches for the word and add the word if it does not exist.
405      * @return Returns the terminal node of the word we are searching for.
406      */
searchWord(NodeArray children, String word, int depth, Node parentNode)407     private Node searchWord(NodeArray children, String word, int depth, Node parentNode) {
408         final int wordLength = word.length();
409         final char c = word.charAt(depth);
410         // Does children have the current character?
411         final int childrenLength = children.mLength;
412         Node childNode = null;
413         for (int i = 0; i < childrenLength; i++) {
414             final Node node = children.mData[i];
415             if (node.mCode == c) {
416                 childNode = node;
417                 break;
418             }
419         }
420         if (childNode == null) {
421             childNode = new Node();
422             childNode.mCode = c;
423             childNode.mParent = parentNode;
424             children.add(childNode);
425         }
426         if (wordLength == depth + 1) {
427             // Terminate this word
428             childNode.mTerminal = true;
429             return childNode;
430         }
431         if (childNode.mChildren == null) {
432             childNode.mChildren = new NodeArray();
433         }
434         return searchWord(childNode.mChildren, word, depth + 1, childNode);
435     }
436 
437     // @VisibleForTesting
reloadDictionaryIfRequired()438     boolean reloadDictionaryIfRequired() {
439         synchronized (mUpdatingLock) {
440             // If we need to update, start off a background task
441             if (mRequiresReload) startDictionaryLoadingTaskLocked();
442             // Currently updating contacts, don't return any results.
443             return mUpdatingDictionary;
444         }
445     }
446 
runBigramReverseLookUp(final CharSequence previousWord, final WordCallback callback)447     private void runBigramReverseLookUp(final CharSequence previousWord,
448             final WordCallback callback) {
449         // Search for the lowercase version of the word only, because that's where bigrams
450         // store their sons.
451         Node prevWord = searchNode(mRoots, previousWord.toString().toLowerCase(), 0,
452                 previousWord.length());
453         if (prevWord != null && prevWord.mNGrams != null) {
454             reverseLookUp(prevWord.mNGrams, callback);
455         }
456     }
457 
458     @Override
getBigrams(final WordComposer codes, final CharSequence previousWord, final WordCallback callback)459     public void getBigrams(final WordComposer codes, final CharSequence previousWord,
460             final WordCallback callback) {
461         if (!reloadDictionaryIfRequired()) {
462             runBigramReverseLookUp(previousWord, callback);
463         }
464     }
465 
466     /**
467      * Used for testing purposes and in the spell checker
468      * This function will wait for loading from database to be done
469      */
waitForDictionaryLoading()470     void waitForDictionaryLoading() {
471         while (mUpdatingDictionary) {
472             try {
473                 Thread.sleep(100);
474             } catch (InterruptedException e) {
475                 //
476             }
477         }
478     }
479 
blockingReloadDictionaryIfRequired()480     protected final void blockingReloadDictionaryIfRequired() {
481         reloadDictionaryIfRequired();
482         waitForDictionaryLoading();
483     }
484 
485     // Local to reverseLookUp, but do not allocate each time.
486     private final char[] mLookedUpString = new char[MAX_WORD_LENGTH];
487 
488     /**
489      * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
490      * through callback.
491      * @param terminalNodes list of terminal nodes we want to add
492      */
reverseLookUp(LinkedList<NextWord> terminalNodes, final WordCallback callback)493     private void reverseLookUp(LinkedList<NextWord> terminalNodes,
494             final WordCallback callback) {
495         Node node;
496         int freq;
497         for (NextWord nextWord : terminalNodes) {
498             node = nextWord.mWord;
499             freq = nextWord.getFrequency();
500             int index = MAX_WORD_LENGTH;
501             do {
502                 --index;
503                 mLookedUpString[index] = node.mCode;
504                 node = node.mParent;
505             } while (node != null);
506 
507             callback.addWord(mLookedUpString, index, MAX_WORD_LENGTH - index, freq, mDicTypeId,
508                     DataType.BIGRAM);
509         }
510     }
511 
512     /**
513      * Recursively search for the terminal node of the word.
514      *
515      * One iteration takes the full word to search for and the current index of the recursion.
516      *
517      * @param children the node of the trie to search under.
518      * @param word the word to search for. Only read [offset..length] so there may be trailing chars
519      * @param offset the index in {@code word} this recursion should operate on.
520      * @param length the length of the input word.
521      * @return Returns the terminal node of the word if the word exists
522      */
searchNode(final NodeArray children, final CharSequence word, final int offset, final int length)523     private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
524             final int length) {
525         final int count = children.mLength;
526         final char currentChar = word.charAt(offset);
527         for (int j = 0; j < count; j++) {
528             final Node node = children.mData[j];
529             if (node.mCode == currentChar) {
530                 if (offset == length - 1) {
531                     if (node.mTerminal) {
532                         return node;
533                     }
534                 } else {
535                     if (node.mChildren != null) {
536                         Node returnNode = searchNode(node.mChildren, word, offset + 1, length);
537                         if (returnNode != null) return returnNode;
538                     }
539                 }
540             }
541         }
542         return null;
543     }
544 
clearDictionary()545     protected void clearDictionary() {
546         mRoots = new NodeArray();
547     }
548 
549     private class LoadDictionaryTask extends Thread {
550         @Override
run()551         public void run() {
552             loadDictionaryAsync();
553             synchronized (mUpdatingLock) {
554                 mUpdatingDictionary = false;
555             }
556         }
557     }
558 
toLowerCase(char c)559     private static char toLowerCase(char c) {
560         char baseChar = c;
561         if (c < BASE_CHARS.length) {
562             baseChar = BASE_CHARS[c];
563         }
564         if (baseChar >= 'A' && baseChar <= 'Z') {
565             return (char)(baseChar | 32);
566         } else if (baseChar > 127) {
567             return Character.toLowerCase(baseChar);
568         }
569         return baseChar;
570     }
571 
572     /**
573      * Table mapping most combined Latin, Greek, and Cyrillic characters
574      * to their base characters.  If c is in range, BASE_CHARS[c] == c
575      * if c is not a combined character, or the base character if it
576      * is combined.
577      */
578     private static final char BASE_CHARS[] = {
579         0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
580         0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
581         0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
582         0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
583         0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
584         0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
585         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
586         0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
587         0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
588         0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
589         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
590         0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
591         0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
592         0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
593         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
594         0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
595         0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
596         0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
597         0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
598         0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
599         0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
600         0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
601         0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
602         0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
603         0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
604         0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
605         0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
606         0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
607                                                                         // Manually changed df to 73
608         0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
609         0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
610         0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
611         0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
612         0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
613         0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
614         0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
615         0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
616         0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
617         0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
618         0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
619         0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
620         0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
621         0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
622         0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
623         0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
624         0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
625         0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
626         0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
627         0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
628         0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
629         0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
630         0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
631         0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
632         0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
633         0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
634         0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
635         0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
636         0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
637         0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
638         0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
639         0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
640         0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
641         0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
642         0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
643         0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
644         0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
645         0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
646         0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
647         0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
648         0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
649         0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
650         0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
651         0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
652         0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
653         0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
654         0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
655         0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
656         0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
657         0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
658         0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
659         0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
660         0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
661         0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
662         0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
663         0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
664         0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
665         0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
666         0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
667         0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
668         0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
669         0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
670         0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
671         0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
672         0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
673         0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
674         0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
675         0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
676         0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
677         0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
678         0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
679         0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
680         0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
681         0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
682         0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
683         0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
684         0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
685         0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
686         0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
687         0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
688         0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
689         0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
690         0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
691         0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
692         0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
693         0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
694         0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
695         0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
696         0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
697         0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
698         0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
699         0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
700         0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
701         0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
702         0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
703         0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
704         0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
705         0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
706         0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
707         0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
708         0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
709         0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
710         0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
711         0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
712         0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
713         0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
714         0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
715         0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
716         0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
717         0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
718         0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
719         0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
720         0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
721         0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
722         0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
723         0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
724         0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
725         0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
726         0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
727         0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
728         0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
729         0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
730         0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
731         0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
732         0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
733         0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
734         0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
735         0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
736         0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
737         0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
738         0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
739         0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
740     };
741 
742     // generated with:
743     // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
744 
745 }
746