1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 21 /** 22 * Base class for an in-memory dictionary that can grow dynamically and can 23 * be searched for suggestions and valid words. 24 */ 25 public class ExpandableDictionary extends Dictionary { 26 private Context mContext; 27 private char[] mWordBuilder = new char[MAX_WORD_LENGTH]; 28 private int mMaxDepth; 29 private int mInputLength; 30 31 public static final int MAX_WORD_LENGTH = 32; 32 private static final char QUOTE = '\''; 33 34 static class Node { 35 char code; 36 int frequency; 37 boolean terminal; 38 NodeArray children; 39 } 40 41 static class NodeArray { 42 Node[] data; 43 int length = 0; 44 private static final int INCREMENT = 2; 45 NodeArray()46 NodeArray() { 47 data = new Node[INCREMENT]; 48 } 49 add(Node n)50 void add(Node n) { 51 if (length + 1 > data.length) { 52 Node[] tempData = new Node[length + INCREMENT]; 53 if (length > 0) { 54 System.arraycopy(data, 0, tempData, 0, length); 55 } 56 data = tempData; 57 } 58 data[length++] = n; 59 } 60 } 61 62 private NodeArray mRoots; 63 64 private int[][] mCodes; 65 ExpandableDictionary(Context context)66 ExpandableDictionary(Context context) { 67 mContext = context; 68 clearDictionary(); 69 mCodes = new int[MAX_WORD_LENGTH][]; 70 } 71 getContext()72 Context getContext() { 73 return mContext; 74 } 75 getMaxWordLength()76 int getMaxWordLength() { 77 return MAX_WORD_LENGTH; 78 } 79 addWord(String word, int frequency)80 public void addWord(String word, int frequency) { 81 addWordRec(mRoots, word, 0, frequency); 82 } 83 addWordRec(NodeArray children, final String word, final int depth, final int frequency)84 private void addWordRec(NodeArray children, final String word, 85 final int depth, final int frequency) { 86 87 final int wordLength = word.length(); 88 final char c = word.charAt(depth); 89 // Does children have the current character? 90 final int childrenLength = children.length; 91 Node childNode = null; 92 boolean found = false; 93 for (int i = 0; i < childrenLength; i++) { 94 childNode = children.data[i]; 95 if (childNode.code == c) { 96 found = true; 97 break; 98 } 99 } 100 if (!found) { 101 childNode = new Node(); 102 childNode.code = c; 103 children.add(childNode); 104 } 105 if (wordLength == depth + 1) { 106 // Terminate this word 107 childNode.terminal = true; 108 childNode.frequency += frequency; // If there are multiple similar words 109 if (childNode.frequency > 256) childNode.frequency = 256; 110 return; 111 } 112 if (childNode.children == null) { 113 childNode.children = new NodeArray(); 114 } 115 addWordRec(childNode.children, word, depth + 1, frequency); 116 } 117 118 @Override getWords(final WordComposer codes, final WordCallback callback)119 public void getWords(final WordComposer codes, final WordCallback callback) { 120 mInputLength = codes.size(); 121 if (mCodes.length < mInputLength) mCodes = new int[mInputLength][]; 122 // Cache the codes so that we don't have to lookup an array list 123 for (int i = 0; i < mInputLength; i++) { 124 mCodes[i] = codes.getCodesAt(i); 125 } 126 mMaxDepth = mInputLength * 3; 127 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback); 128 for (int i = 0; i < mInputLength; i++) { 129 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback); 130 } 131 } 132 133 @Override isValidWord(CharSequence word)134 public synchronized boolean isValidWord(CharSequence word) { 135 final int freq = getWordFrequencyRec(mRoots, word, 0, word.length()); 136 return freq > -1; 137 } 138 139 /** 140 * Returns the word's frequency or -1 if not found 141 */ getWordFrequency(CharSequence word)142 public int getWordFrequency(CharSequence word) { 143 return getWordFrequencyRec(mRoots, word, 0, word.length()); 144 } 145 146 /** 147 * Returns the word's frequency or -1 if not found 148 */ getWordFrequencyRec(final NodeArray children, final CharSequence word, final int offset, final int length)149 private int getWordFrequencyRec(final NodeArray children, final CharSequence word, 150 final int offset, final int length) { 151 final int count = children.length; 152 char currentChar = word.charAt(offset); 153 for (int j = 0; j < count; j++) { 154 final Node node = children.data[j]; 155 if (node.code == currentChar) { 156 if (offset == length - 1) { 157 if (node.terminal) { 158 return node.frequency; 159 } 160 } else { 161 if (node.children != null) { 162 int freq = getWordFrequencyRec(node.children, word, offset + 1, length); 163 if (freq > -1) return freq; 164 } 165 } 166 } 167 } 168 return -1; 169 } 170 171 /** 172 * Recursively traverse the tree for words that match the input. Input consists of 173 * a list of arrays. Each item in the list is one input character position. An input 174 * character is actually an array of multiple possible candidates. This function is not 175 * optimized for speed, assuming that the user dictionary will only be a few hundred words in 176 * size. 177 * @param roots node whose children have to be search for matches 178 * @param codes the input character codes 179 * @param word the word being composed as a possible match 180 * @param depth the depth of traversal - the length of the word being composed thus far 181 * @param completion whether the traversal is now in completion mode - meaning that we've 182 * exhausted the input and we're looking for all possible suffixes. 183 * @param snr current weight of the word being formed 184 * @param inputIndex position in the input characters. This can be off from the depth in 185 * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type 186 * "wouldve", it could be matching "would've", so the depth will be one more than the 187 * inputIndex 188 * @param callback the callback class for adding a word 189 */ getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, final int depth, boolean completion, int snr, int inputIndex, int skipPos, WordCallback callback)190 protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, 191 final int depth, boolean completion, int snr, int inputIndex, int skipPos, 192 WordCallback callback) { 193 final int count = roots.length; 194 final int codeSize = mInputLength; 195 // Optimization: Prune out words that are too long compared to how much was typed. 196 if (depth > mMaxDepth) { 197 return; 198 } 199 int[] currentChars = null; 200 if (codeSize <= inputIndex) { 201 completion = true; 202 } else { 203 currentChars = mCodes[inputIndex]; 204 } 205 206 for (int i = 0; i < count; i++) { 207 final Node node = roots.data[i]; 208 final char c = node.code; 209 final char lowerC = toLowerCase(c); 210 final boolean terminal = node.terminal; 211 final NodeArray children = node.children; 212 final int freq = node.frequency; 213 if (completion) { 214 word[depth] = c; 215 if (terminal) { 216 if (!callback.addWord(word, 0, depth + 1, freq * snr)) { 217 return; 218 } 219 } 220 if (children != null) { 221 getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 222 skipPos, callback); 223 } 224 } else if ((c == QUOTE && currentChars[0] != QUOTE) || depth == skipPos) { 225 // Skip the ' and continue deeper 226 word[depth] = c; 227 if (children != null) { 228 getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 229 skipPos, callback); 230 } 231 } else { 232 // Don't use alternatives if we're looking for missing characters 233 final int alternativesSize = skipPos >= 0? 1 : currentChars.length; 234 for (int j = 0; j < alternativesSize; j++) { 235 final int addedAttenuation = (j > 0 ? 1 : 2); 236 final int currentChar = currentChars[j]; 237 if (currentChar == -1) { 238 break; 239 } 240 if (currentChar == lowerC || currentChar == c) { 241 word[depth] = c; 242 243 if (codeSize == depth + 1) { 244 if (terminal) { 245 if (INCLUDE_TYPED_WORD_IF_VALID 246 || !same(word, depth + 1, codes.getTypedWord())) { 247 int finalFreq = freq * snr * addedAttenuation; 248 if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER; 249 callback.addWord(word, 0, depth + 1, finalFreq); 250 } 251 } 252 if (children != null) { 253 getWordsRec(children, codes, word, depth + 1, 254 true, snr * addedAttenuation, inputIndex + 1, 255 skipPos, callback); 256 } 257 } else if (children != null) { 258 getWordsRec(children, codes, word, depth + 1, 259 false, snr * addedAttenuation, inputIndex + 1, 260 skipPos, callback); 261 } 262 } 263 } 264 } 265 } 266 } 267 clearDictionary()268 protected void clearDictionary() { 269 mRoots = new NodeArray(); 270 } 271 toLowerCase(char c)272 static char toLowerCase(char c) { 273 if (c < BASE_CHARS.length) { 274 c = BASE_CHARS[c]; 275 } 276 if (c >= 'A' && c <= 'Z') { 277 c = (char) (c | 32); 278 } else if (c > 127) { 279 c = Character.toLowerCase(c); 280 } 281 return c; 282 } 283 284 /** 285 * Table mapping most combined Latin, Greek, and Cyrillic characters 286 * to their base characters. If c is in range, BASE_CHARS[c] == c 287 * if c is not a combined character, or the base character if it 288 * is combined. 289 */ 290 static final char BASE_CHARS[] = { 291 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 292 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 293 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 294 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 295 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 296 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 297 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 298 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 299 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 300 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 301 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 302 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 303 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 304 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 305 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 306 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 307 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 308 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 309 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 310 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 311 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 312 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020, 313 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7, 314 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf, 315 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043, 316 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 317 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7, 318 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f 319 // Manually changed df to 73 320 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063, 321 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 322 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7, 323 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f 324 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, 325 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, 326 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 327 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, 328 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, 329 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 330 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b, 331 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 332 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e, 333 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f, 334 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072, 335 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073, 336 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167, 337 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 338 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, 339 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073, 340 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, 341 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f, 342 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, 343 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f, 344 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7, 345 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055, 346 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7, 347 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf, 348 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c, 349 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049, 350 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc, 351 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4, 352 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067, 353 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292, 354 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7, 355 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8, 356 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, 357 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f, 358 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, 359 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068, 360 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061, 361 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f, 362 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237, 363 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f, 364 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 365 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f, 366 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257, 367 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f, 368 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267, 369 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f, 370 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, 371 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f, 372 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, 373 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f, 374 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 375 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, 376 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, 377 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, 378 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077, 379 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, 380 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, 381 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, 382 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, 383 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df, 384 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7, 385 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, 386 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, 387 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, 388 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 389 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, 390 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 391 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, 392 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 393 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, 394 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 395 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, 396 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347, 397 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, 398 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 399 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, 400 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 401 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, 402 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377, 403 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f, 404 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7, 405 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9, 406 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 407 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 408 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 409 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9, 410 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 411 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 412 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 413 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf, 414 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7, 415 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df, 416 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7, 417 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef, 418 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7, 419 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff, 420 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 421 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f, 422 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 423 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 424 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 425 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 426 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 427 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 428 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 429 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 430 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456, 431 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f, 432 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467, 433 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f, 434 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475, 435 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f, 436 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 437 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f, 438 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497, 439 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f, 440 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7, 441 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af, 442 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7, 443 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf, 444 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7, 445 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf, 446 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435, 447 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437, 448 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e, 449 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443, 450 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, 451 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, 452 }; 453 454 // generated with: 455 // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' 456 457 } 458