1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <stdio.h>
18 #include <string.h>
19 #include <assert.h>
20 #include "../include/dictdef.h"
21
22 #ifdef ___BUILD_MODEL___
23 #include "../include/spellingtable.h"
24 #endif
25
26 #include "../include/spellingtrie.h"
27
28 namespace ime_pinyin {
29
30 SpellingTrie* SpellingTrie::instance_ = NULL;
31
32 // z/c/s is for Zh/Ch/Sh
33 const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
34 "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
35
36 // Bit 0 : is it a Shengmu char?
37 // Bit 1 : is it a Yunmu char? (one char is a Yunmu)
38 // Bit 2 : is it enabled in ShouZiMu(first char) mode?
39 unsigned char SpellingTrie::char_flags_[] = {
40 // a b c d e f g
41 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
42 // h i j k l m n
43 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
44 // o p q r s t
45 0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
46 // u v w x y z
47 0x00, 0x00, 0x01, 0x01, 0x01, 0x01
48 };
49
compare_spl(const void * p1,const void * p2)50 int compare_spl(const void* p1, const void* p2) {
51 return strcmp((const char*)(p1), (const char*)(p2));
52 }
53
SpellingTrie()54 SpellingTrie::SpellingTrie() {
55 spelling_buf_ = NULL;
56 spelling_size_ = 0;
57 spelling_num_ = 0;
58 spl_ym_ids_ = NULL;
59 splstr_queried_ = NULL;
60 splstr16_queried_ = NULL;
61 root_ = NULL;
62 dumb_node_ = NULL;
63 splitter_node_ = NULL;
64 instance_ = NULL;
65 ym_buf_ = NULL;
66 f2h_ = NULL;
67
68 szm_enable_shm(true);
69 szm_enable_ym(true);
70
71 #ifdef ___BUILD_MODEL___
72 node_num_ = 0;
73 #endif
74 }
75
~SpellingTrie()76 SpellingTrie::~SpellingTrie() {
77 if (NULL != spelling_buf_)
78 delete [] spelling_buf_;
79
80 if (NULL != splstr_queried_)
81 delete [] splstr_queried_;
82
83 if (NULL != splstr16_queried_)
84 delete [] splstr16_queried_;
85
86 if (NULL != spl_ym_ids_)
87 delete [] spl_ym_ids_;
88
89 if (NULL != root_) {
90 free_son_trie(root_);
91 delete root_;
92 }
93
94 if (NULL != dumb_node_) {
95 delete [] dumb_node_;
96 }
97
98 if (NULL != splitter_node_) {
99 delete [] splitter_node_;
100 }
101
102 if (NULL != instance_) {
103 delete instance_;
104 instance_ = NULL;
105 }
106
107 if (NULL != ym_buf_)
108 delete [] ym_buf_;
109
110 if (NULL != f2h_)
111 delete [] f2h_;
112 }
113
if_valid_id_update(uint16 * splid) const114 bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
115 if (NULL == splid || 0 == *splid)
116 return false;
117
118 if (*splid >= kFullSplIdStart)
119 return true;
120 if (*splid < kFullSplIdStart) {
121 char ch = kHalfId2Sc_[*splid];
122 if (ch > 'Z') {
123 return true;
124 } else {
125 if (szm_is_enabled(ch)) {
126 return true;
127 } else if (is_yunmu_char(ch)) {
128 assert(h2f_num_[*splid] > 0);
129 *splid = h2f_start_[*splid];
130 return true;
131 }
132 }
133 }
134 return false;
135 }
136
is_half_id(uint16 splid) const137 bool SpellingTrie::is_half_id(uint16 splid) const {
138 if (0 == splid || splid >= kFullSplIdStart)
139 return false;
140
141 return true;
142 }
143
is_full_id(uint16 splid) const144 bool SpellingTrie::is_full_id(uint16 splid) const {
145 if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
146 return false;
147 return true;
148 }
149
half_full_compatible(uint16 half_id,uint16 full_id) const150 bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
151 uint16 half_fr_full = full_to_half(full_id);
152
153 if (half_fr_full == half_id)
154 return true;
155
156 // &~0x20 is used to conver the char to upper case.
157 // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
158 char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
159 char ch_h = kHalfId2Sc_[half_id];
160 if (ch_f == ch_h)
161 return true;
162
163 return false;
164 }
165
is_half_id_yunmu(uint16 splid) const166 bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
167 if (0 == splid || splid >= kFullSplIdStart)
168 return false;
169
170 char ch = kHalfId2Sc_[splid];
171 // If ch >= 'a', that means the half id is one of Zh/Ch/Sh
172 if (ch >= 'a') {
173 return false;
174 }
175
176 return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
177 }
178
is_shengmu_char(char ch) const179 bool SpellingTrie::is_shengmu_char(char ch) const {
180 return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
181 }
182
is_yunmu_char(char ch) const183 bool SpellingTrie::is_yunmu_char(char ch) const {
184 return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
185 }
186
is_szm_char(char ch) const187 bool SpellingTrie::is_szm_char(char ch) const {
188 return is_shengmu_char(ch) || is_yunmu_char(ch);
189 }
190
szm_is_enabled(char ch) const191 bool SpellingTrie::szm_is_enabled(char ch) const {
192 return char_flags_[ch - 'A'] & kHalfIdSzmMask;
193 }
194
szm_enable_shm(bool enable)195 void SpellingTrie::szm_enable_shm(bool enable) {
196 if (enable) {
197 for (char ch = 'A'; ch <= 'Z'; ch++) {
198 if (is_shengmu_char(ch))
199 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
200 }
201 } else {
202 for (char ch = 'A'; ch <= 'Z'; ch++) {
203 if (is_shengmu_char(ch))
204 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
205 }
206 }
207 }
208
szm_enable_ym(bool enable)209 void SpellingTrie::szm_enable_ym(bool enable) {
210 if (enable) {
211 for (char ch = 'A'; ch <= 'Z'; ch++) {
212 if (is_yunmu_char(ch))
213 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
214 }
215 } else {
216 for (char ch = 'A'; ch <= 'Z'; ch++) {
217 if (is_yunmu_char(ch))
218 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
219 }
220 }
221 }
222
is_szm_enabled(char ch) const223 bool SpellingTrie::is_szm_enabled(char ch) const {
224 return char_flags_[ch - 'A'] & kHalfIdSzmMask;
225 }
226
get_cpinstance()227 const SpellingTrie* SpellingTrie::get_cpinstance() {
228 return &get_instance();
229 }
230
get_instance()231 SpellingTrie& SpellingTrie::get_instance() {
232 if (NULL == instance_)
233 instance_ = new SpellingTrie();
234
235 return *instance_;
236 }
237
half2full_num(uint16 half_id) const238 uint16 SpellingTrie::half2full_num(uint16 half_id) const {
239 if (NULL == root_ || half_id >= kFullSplIdStart)
240 return 0;
241 return h2f_num_[half_id];
242 }
243
half_to_full(uint16 half_id,uint16 * spl_id_start) const244 uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
245 if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
246 return 0;
247
248 *spl_id_start = h2f_start_[half_id];
249 return h2f_num_[half_id];
250 }
251
full_to_half(uint16 full_id) const252 uint16 SpellingTrie::full_to_half(uint16 full_id) const {
253 if (NULL == root_ || full_id < kFullSplIdStart ||
254 full_id > spelling_num_ + kFullSplIdStart)
255 return 0;
256
257 return f2h_[full_id - kFullSplIdStart];
258 }
259
free_son_trie(SpellingNode * node)260 void SpellingTrie::free_son_trie(SpellingNode* node) {
261 if (NULL == node)
262 return;
263
264 for (size_t pos = 0; pos < node->num_of_son; pos++) {
265 free_son_trie(node->first_son + pos);
266 }
267
268 if (NULL != node->first_son)
269 delete [] node->first_son;
270 }
271
construct(const char * spelling_arr,size_t item_size,size_t item_num,float score_amplifier,unsigned char average_score)272 bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
273 size_t item_num, float score_amplifier,
274 unsigned char average_score) {
275 if (spelling_arr == NULL)
276 return false;
277
278 memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
279 memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
280
281 // If the arr is the same as the buf, means this function is called by
282 // load_table(), the table data are ready; otherwise the array should be
283 // saved.
284 if (spelling_arr != spelling_buf_) {
285 if (NULL != spelling_buf_)
286 delete [] spelling_buf_;
287 spelling_buf_ = new char[item_size * item_num];
288 if (NULL == spelling_buf_)
289 return false;
290 memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
291 }
292
293 spelling_size_ = item_size;
294 spelling_num_ = item_num;
295
296 score_amplifier_ = score_amplifier;
297 average_score_ = average_score;
298
299 if (NULL != splstr_queried_)
300 delete [] splstr_queried_;
301 splstr_queried_ = new char[spelling_size_];
302 if (NULL == splstr_queried_)
303 return false;
304
305 if (NULL != splstr16_queried_)
306 delete [] splstr16_queried_;
307 splstr16_queried_ = new char16[spelling_size_];
308 if (NULL == splstr16_queried_)
309 return false;
310
311 // First, sort the buf to ensure they are in ascendant order
312 qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
313
314 #ifdef ___BUILD_MODEL___
315 node_num_ = 1;
316 #endif
317
318 root_ = new SpellingNode();
319 memset(root_, 0, sizeof(SpellingNode));
320
321 dumb_node_ = new SpellingNode();
322 memset(dumb_node_, 0, sizeof(SpellingNode));
323 dumb_node_->score = average_score_;
324
325 splitter_node_ = new SpellingNode();
326 memset(splitter_node_, 0, sizeof(SpellingNode));
327 splitter_node_->score = average_score_;
328
329 memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);
330
331 root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
332
333 // Root's score should be cleared.
334 root_->score = 0;
335
336 if (NULL == root_->first_son)
337 return false;
338
339 h2f_start_[0] = h2f_num_[0] = 0;
340
341 if (!build_f2h())
342 return false;
343
344 #ifdef ___BUILD_MODEL___
345 if (kPrintDebug0) {
346 printf("---SpellingTrie Nodes: %d\n", node_num_);
347 }
348 return build_ym_info();
349 #else
350 return true;
351 #endif
352 }
353
354 #ifdef ___BUILD_MODEL___
get_ym_str(const char * spl_str)355 const char* SpellingTrie::get_ym_str(const char *spl_str) {
356 bool start_ZCS = false;
357 if (is_shengmu_char(*spl_str)) {
358 if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
359 start_ZCS = true;
360 spl_str += 1;
361 if (start_ZCS && 'h' == *spl_str)
362 spl_str += 1;
363 }
364 return spl_str;
365 }
366
build_ym_info()367 bool SpellingTrie::build_ym_info() {
368 bool sucess;
369 SpellingTable *spl_table = new SpellingTable();
370
371 sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
372 assert(sucess);
373
374 for (uint16 pos = 0; pos < spelling_num_; pos++) {
375 const char *spl_str = spelling_buf_ + spelling_size_ * pos;
376 spl_str = get_ym_str(spl_str);
377 if ('\0' != spl_str[0]) {
378 sucess = spl_table->put_spelling(spl_str, 0);
379 assert(sucess);
380 }
381 }
382
383 size_t ym_item_size; // '\0' is included
384 size_t ym_num;
385 const char* ym_buf;
386 ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
387
388 if (NULL != ym_buf_)
389 delete [] ym_buf_;
390 ym_buf_ = new char[ym_item_size * ym_num];
391 if (NULL == ym_buf_) {
392 delete spl_table;
393 return false;
394 }
395
396 memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
397 ym_size_ = ym_item_size;
398 ym_num_ = ym_num;
399
400 delete spl_table;
401
402 // Generate the maping from the spelling ids to the Yunmu ids.
403 if (spl_ym_ids_)
404 delete spl_ym_ids_;
405 spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
406 if (NULL == spl_ym_ids_)
407 return false;
408
409 memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
410
411 for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
412 const char *str = get_spelling_str(id);
413
414 str = get_ym_str(str);
415 if ('\0' != str[0]) {
416 uint8 ym_id = get_ym_id(str);
417 spl_ym_ids_[id] = ym_id;
418 assert(ym_id > 0);
419 } else {
420 spl_ym_ids_[id] = 0;
421 }
422 }
423 return true;
424 }
425 #endif
426
construct_spellings_subset(size_t item_start,size_t item_end,size_t level,SpellingNode * parent)427 SpellingNode* SpellingTrie::construct_spellings_subset(
428 size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
429 if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
430 return NULL;
431
432 SpellingNode *first_son = NULL;
433 uint16 num_of_son = 0;
434 unsigned char min_son_score = 255;
435
436 const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
437 char char_for_node = spelling_last_start[level];
438 assert(char_for_node >= 'A' && char_for_node <= 'Z' ||
439 'h' == char_for_node);
440
441 // Scan the array to find how many sons
442 for (size_t i = item_start + 1; i < item_end; i++) {
443 const char *spelling_current = spelling_buf_ + spelling_size_ * i;
444 char char_current = spelling_current[level];
445 if (char_current != char_for_node) {
446 num_of_son++;
447 char_for_node = char_current;
448 }
449 }
450 num_of_son++;
451
452 // Allocate memory
453 #ifdef ___BUILD_MODEL___
454 node_num_ += num_of_son;
455 #endif
456 first_son = new SpellingNode[num_of_son];
457 memset(first_son, 0, sizeof(SpellingNode)*num_of_son);
458
459 // Now begin construct tree
460 size_t son_pos = 0;
461
462 spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
463 char_for_node = spelling_last_start[level];
464
465 bool spelling_endable = true;
466 if (spelling_last_start[level + 1] != '\0')
467 spelling_endable = false;
468
469 size_t item_start_next = item_start;
470
471 for (size_t i = item_start + 1; i < item_end; i++) {
472 const char *spelling_current = spelling_buf_ + spelling_size_ * i;
473 char char_current = spelling_current[level];
474 assert(is_valid_spl_char(char_current));
475
476 if (char_current != char_for_node) {
477 // Construct a node
478 SpellingNode *node_current = first_son + son_pos;
479 node_current->char_this_node = char_for_node;
480
481 // For quick search in the first level
482 if (0 == level)
483 level1_sons_[char_for_node - 'A'] = node_current;
484
485 if (spelling_endable) {
486 node_current->spelling_idx = kFullSplIdStart + item_start_next;
487 }
488
489 if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
490 size_t real_start = item_start_next;
491 if (spelling_last_start[level + 1] == '\0')
492 real_start++;
493
494 node_current->first_son =
495 construct_spellings_subset(real_start, i, level + 1,
496 node_current);
497
498 if (real_start == item_start_next + 1) {
499 uint16 score_this = static_cast<unsigned char>(
500 spelling_last_start[spelling_size_ - 1]);
501 if (score_this < node_current->score)
502 node_current->score = score_this;
503 }
504 } else {
505 node_current->first_son = NULL;
506 node_current->score = static_cast<unsigned char>(
507 spelling_last_start[spelling_size_ - 1]);
508 }
509
510 if (node_current->score < min_son_score)
511 min_son_score = node_current->score;
512
513 bool is_half = false;
514 if (level == 0 && is_szm_char(char_for_node)) {
515 node_current->spelling_idx =
516 static_cast<uint16>(char_for_node - 'A' + 1);
517
518 if (char_for_node > 'C')
519 node_current->spelling_idx++;
520 if (char_for_node > 'S')
521 node_current->spelling_idx++;
522
523 h2f_num_[node_current->spelling_idx] = i - item_start_next;
524 is_half = true;
525 } else if (level == 1 && char_for_node == 'h') {
526 char ch_level0 = spelling_last_start[0];
527 uint16 part_id = 0;
528 if (ch_level0 == 'C')
529 part_id = 'C' - 'A' + 1 + 1;
530 else if (ch_level0 == 'S')
531 part_id = 'S' - 'A' + 1 + 2;
532 else if (ch_level0 == 'Z')
533 part_id = 'Z' - 'A' + 1 + 3;
534 if (0 != part_id) {
535 node_current->spelling_idx = part_id;
536 h2f_num_[node_current->spelling_idx] = i - item_start_next;
537 is_half = true;
538 }
539 }
540
541 if (is_half) {
542 if (h2f_num_[node_current->spelling_idx] > 0)
543 h2f_start_[node_current->spelling_idx] =
544 item_start_next + kFullSplIdStart;
545 else
546 h2f_start_[node_current->spelling_idx] = 0;
547 }
548
549 // for next sibling
550 spelling_last_start = spelling_current;
551 char_for_node = char_current;
552 item_start_next = i;
553 spelling_endable = true;
554 if (spelling_current[level + 1] != '\0')
555 spelling_endable = false;
556
557 son_pos++;
558 }
559 }
560
561 // the last one
562 SpellingNode *node_current = first_son + son_pos;
563 node_current->char_this_node = char_for_node;
564
565 // For quick search in the first level
566 if (0 == level)
567 level1_sons_[char_for_node - 'A'] = node_current;
568
569 if (spelling_endable) {
570 node_current->spelling_idx = kFullSplIdStart + item_start_next;
571 }
572
573 if (spelling_last_start[level + 1] != '\0' ||
574 item_end - item_start_next > 1) {
575 size_t real_start = item_start_next;
576 if (spelling_last_start[level + 1] == '\0')
577 real_start++;
578
579 node_current->first_son =
580 construct_spellings_subset(real_start, item_end, level + 1,
581 node_current);
582
583 if (real_start == item_start_next + 1) {
584 uint16 score_this = static_cast<unsigned char>(
585 spelling_last_start[spelling_size_ - 1]);
586 if (score_this < node_current->score)
587 node_current->score = score_this;
588 }
589 } else {
590 node_current->first_son = NULL;
591 node_current->score = static_cast<unsigned char>(
592 spelling_last_start[spelling_size_ - 1]);
593 }
594
595 if (node_current->score < min_son_score)
596 min_son_score = node_current->score;
597
598 assert(son_pos + 1 == num_of_son);
599
600 bool is_half = false;
601 if (level == 0 && szm_is_enabled(char_for_node)) {
602 node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
603
604 if (char_for_node > 'C')
605 node_current->spelling_idx++;
606 if (char_for_node > 'S')
607 node_current->spelling_idx++;
608
609 h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
610 is_half = true;
611 } else if (level == 1 && char_for_node == 'h') {
612 char ch_level0 = spelling_last_start[0];
613 uint16 part_id = 0;
614 if (ch_level0 == 'C')
615 part_id = 'C' - 'A' + 1 + 1;
616 else if (ch_level0 == 'S')
617 part_id = 'S' - 'A' + 1 + 2;
618 else if (ch_level0 == 'Z')
619 part_id = 'Z' - 'A' + 1 + 3;
620 if (0 != part_id) {
621 node_current->spelling_idx = part_id;
622 h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
623 is_half = true;
624 }
625 }
626 if (is_half) {
627 if (h2f_num_[node_current->spelling_idx] > 0)
628 h2f_start_[node_current->spelling_idx] =
629 item_start_next + kFullSplIdStart;
630 else
631 h2f_start_[node_current->spelling_idx] = 0;
632 }
633
634 parent->num_of_son = num_of_son;
635 parent->score = min_son_score;
636 return first_son;
637 }
638
save_spl_trie(FILE * fp)639 bool SpellingTrie::save_spl_trie(FILE *fp) {
640 if (NULL == fp || NULL == spelling_buf_)
641 return false;
642
643 if (fwrite(&spelling_size_, sizeof(size_t), 1, fp) != 1)
644 return false;
645
646 if (fwrite(&spelling_num_, sizeof(size_t), 1, fp) != 1)
647 return false;
648
649 if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
650 return false;
651
652 if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
653 return false;
654
655 if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
656 spelling_num_, fp) != spelling_num_)
657 return false;
658
659 return true;
660 }
661
load_spl_trie(FILE * fp)662 bool SpellingTrie::load_spl_trie(FILE *fp) {
663 if (NULL == fp)
664 return false;
665
666 if (fread(&spelling_size_, sizeof(size_t), 1, fp) != 1)
667 return false;
668
669 if (fread(&spelling_num_, sizeof(size_t), 1, fp) != 1)
670 return false;
671
672 if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
673 return false;
674
675 if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
676 return false;
677
678 if (NULL != spelling_buf_)
679 delete [] spelling_buf_;
680
681 spelling_buf_ = new char[spelling_size_ * spelling_num_];
682 if (NULL == spelling_buf_)
683 return false;
684
685 if (fread(spelling_buf_, sizeof(char) * spelling_size_,
686 spelling_num_, fp) != spelling_num_)
687 return false;
688
689 return construct(spelling_buf_, spelling_size_, spelling_num_,
690 score_amplifier_, average_score_);
691 }
692
build_f2h()693 bool SpellingTrie::build_f2h() {
694 if (NULL != f2h_)
695 delete [] f2h_;
696 f2h_ = new uint16[spelling_num_];
697 if (NULL == f2h_)
698 return false;
699
700 for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
701 for (uint16 fid = h2f_start_[hid];
702 fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
703 f2h_[fid - kFullSplIdStart] = hid;
704 }
705
706 return true;
707 }
708
get_spelling_num()709 size_t SpellingTrie::get_spelling_num() {
710 return spelling_num_;
711 }
712
get_ym_id(const char * ym_str)713 uint8 SpellingTrie::get_ym_id(const char *ym_str) {
714 if (NULL == ym_str || NULL == ym_buf_)
715 return 0;
716
717 for (uint8 pos = 0; pos < ym_num_; pos++)
718 if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
719 return pos + 1;
720
721 return 0;
722 }
723
get_spelling_str(uint16 splid)724 const char* SpellingTrie::get_spelling_str(uint16 splid) {
725 splstr_queried_[0] = '\0';
726
727 if (splid >= kFullSplIdStart) {
728 splid -= kFullSplIdStart;
729 snprintf(splstr_queried_, spelling_size_, "%s",
730 spelling_buf_ + splid * spelling_size_);
731 } else {
732 if (splid == 'C' - 'A' + 1 + 1) {
733 snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
734 } else if (splid == 'S' - 'A' + 1 + 2) {
735 snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
736 } else if (splid == 'Z' - 'A' + 1 + 3) {
737 snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
738 } else {
739 if (splid > 'C' - 'A' + 1)
740 splid--;
741 if (splid > 'S' - 'A' + 1)
742 splid--;
743 splstr_queried_[0] = 'A' + splid - 1;
744 splstr_queried_[1] = '\0';
745 }
746 }
747 return splstr_queried_;
748 }
749
get_spelling_str16(uint16 splid)750 const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
751 splstr16_queried_[0] = '\0';
752
753 if (splid >= kFullSplIdStart) {
754 splid -= kFullSplIdStart;
755 for (size_t pos = 0; pos < spelling_size_; pos++) {
756 splstr16_queried_[pos] = static_cast<char16>
757 (spelling_buf_[splid * spelling_size_ + pos]);
758 }
759 } else {
760 if (splid == 'C' - 'A' + 1 + 1) {
761 splstr16_queried_[0] = static_cast<char16>('C');
762 splstr16_queried_[1] = static_cast<char16>('h');
763 splstr16_queried_[2] = static_cast<char16>('\0');
764 } else if (splid == 'S' - 'A' + 1 + 2) {
765 splstr16_queried_[0] = static_cast<char16>('S');
766 splstr16_queried_[1] = static_cast<char16>('h');
767 splstr16_queried_[2] = static_cast<char16>('\0');
768 } else if (splid == 'Z' - 'A' + 1 + 3) {
769 splstr16_queried_[0] = static_cast<char16>('Z');
770 splstr16_queried_[1] = static_cast<char16>('h');
771 splstr16_queried_[2] = static_cast<char16>('\0');
772 } else {
773 if (splid > 'C' - 'A' + 1)
774 splid--;
775 if (splid > 'S' - 'A' + 1)
776 splid--;
777 splstr16_queried_[0] = 'A' + splid - 1;
778 splstr16_queried_[1] = '\0';
779 }
780 }
781 return splstr16_queried_;
782 }
783
get_spelling_str16(uint16 splid,char16 * splstr16,size_t splstr16_len)784 size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
785 size_t splstr16_len) {
786 if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
787
788 if (splid >= kFullSplIdStart) {
789 splid -= kFullSplIdStart;
790 for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
791 splstr16[pos] = static_cast<char16>
792 (spelling_buf_[splid * spelling_size_ + pos]);
793 if (static_cast<char16>('\0') == splstr16[pos]) {
794 return pos;
795 }
796 }
797 } else {
798 if (splid == 'C' - 'A' + 1 + 1) {
799 splstr16[0] = static_cast<char16>('C');
800 splstr16[1] = static_cast<char16>('h');
801 splstr16[2] = static_cast<char16>('\0');
802 return 2;
803 } else if (splid == 'S' - 'A' + 1 + 2) {
804 splstr16[0] = static_cast<char16>('S');
805 splstr16[1] = static_cast<char16>('h');
806 splstr16[2] = static_cast<char16>('\0');
807 return 2;
808 } else if (splid == 'Z' - 'A' + 1 + 3) {
809 splstr16[0] = static_cast<char16>('Z');
810 splstr16[1] = static_cast<char16>('h');
811 splstr16[2] = static_cast<char16>('\0');
812 return 2;
813 } else {
814 if (splid > 'C' - 'A' + 1)
815 splid--;
816 if (splid > 'S' - 'A' + 1)
817 splid--;
818 splstr16[0] = 'A' + splid - 1;
819 splstr16[1] = '\0';
820 return 1;
821 }
822 }
823
824 // Not reachable.
825 return 0;
826 }
827
828 } // namespace ime_pinyin
829