• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <stdio.h>
18 #include <string.h>
19 #include <assert.h>
20 #include "../include/dictdef.h"
21 
22 #ifdef ___BUILD_MODEL___
23 #include "../include/spellingtable.h"
24 #endif
25 
26 #include "../include/spellingtrie.h"
27 
28 namespace ime_pinyin {
29 
30 SpellingTrie* SpellingTrie::instance_ = NULL;
31 
32 // z/c/s is for Zh/Ch/Sh
33 const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
34     "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
35 
36 // Bit 0 : is it a Shengmu char?
37 // Bit 1 : is it a Yunmu char? (one char is a Yunmu)
38 // Bit 2 : is it enabled in ShouZiMu(first char) mode?
39 unsigned char SpellingTrie::char_flags_[] = {
40   // a    b      c     d     e     f     g
41   0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
42   // h    i     j      k     l     m    n
43   0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
44   // o    p     q      r     s     t
45   0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
46   // u    v     w      x     y     z
47   0x00, 0x00, 0x01, 0x01, 0x01, 0x01
48 };
49 
compare_spl(const void * p1,const void * p2)50 int compare_spl(const void* p1, const void* p2) {
51   return strcmp((const char*)(p1), (const char*)(p2));
52 }
53 
SpellingTrie()54 SpellingTrie::SpellingTrie() {
55   spelling_buf_ = NULL;
56   spelling_size_ = 0;
57   spelling_num_ = 0;
58   spl_ym_ids_ = NULL;
59   splstr_queried_ = NULL;
60   splstr16_queried_ = NULL;
61   root_ = NULL;
62   dumb_node_ = NULL;
63   splitter_node_ = NULL;
64   instance_ = NULL;
65   ym_buf_ = NULL;
66   f2h_ = NULL;
67 
68   szm_enable_shm(true);
69   szm_enable_ym(true);
70 
71 #ifdef ___BUILD_MODEL___
72   node_num_ = 0;
73 #endif
74 }
75 
~SpellingTrie()76 SpellingTrie::~SpellingTrie() {
77   if (NULL != spelling_buf_)
78     delete [] spelling_buf_;
79 
80   if (NULL != splstr_queried_)
81     delete [] splstr_queried_;
82 
83   if (NULL != splstr16_queried_)
84     delete [] splstr16_queried_;
85 
86   if (NULL != spl_ym_ids_)
87     delete [] spl_ym_ids_;
88 
89   if (NULL != root_) {
90     free_son_trie(root_);
91     delete root_;
92   }
93 
94   if (NULL != dumb_node_) {
95     delete [] dumb_node_;
96   }
97 
98   if (NULL != splitter_node_) {
99     delete [] splitter_node_;
100   }
101 
102   if (NULL != instance_) {
103     delete instance_;
104     instance_ = NULL;
105   }
106 
107   if (NULL != ym_buf_)
108     delete [] ym_buf_;
109 
110   if (NULL != f2h_)
111     delete [] f2h_;
112 }
113 
if_valid_id_update(uint16 * splid) const114 bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
115   if (NULL == splid || 0 == *splid)
116     return false;
117 
118   if (*splid >= kFullSplIdStart)
119     return true;
120   if (*splid < kFullSplIdStart) {
121     char ch = kHalfId2Sc_[*splid];
122     if (ch > 'Z') {
123       return true;
124     } else {
125       if (szm_is_enabled(ch)) {
126         return true;
127       } else if (is_yunmu_char(ch)) {
128         assert(h2f_num_[*splid] > 0);
129         *splid = h2f_start_[*splid];
130         return true;
131       }
132     }
133   }
134   return false;
135 }
136 
is_half_id(uint16 splid) const137 bool SpellingTrie::is_half_id(uint16 splid) const {
138   if (0 == splid || splid >= kFullSplIdStart)
139     return false;
140 
141   return true;
142 }
143 
is_full_id(uint16 splid) const144 bool SpellingTrie::is_full_id(uint16 splid) const {
145   if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
146     return false;
147   return true;
148 }
149 
half_full_compatible(uint16 half_id,uint16 full_id) const150 bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
151   uint16 half_fr_full = full_to_half(full_id);
152 
153   if (half_fr_full == half_id)
154     return true;
155 
156   // &~0x20 is used to conver the char to upper case.
157   // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
158   char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
159   char ch_h = kHalfId2Sc_[half_id];
160   if (ch_f == ch_h)
161     return true;
162 
163   return false;
164 }
165 
is_half_id_yunmu(uint16 splid) const166 bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
167   if (0 == splid || splid >= kFullSplIdStart)
168     return false;
169 
170   char ch = kHalfId2Sc_[splid];
171   // If ch >= 'a', that means the half id is one of Zh/Ch/Sh
172   if (ch >= 'a') {
173     return false;
174   }
175 
176   return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
177 }
178 
is_shengmu_char(char ch) const179 bool SpellingTrie::is_shengmu_char(char ch) const {
180   return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
181 }
182 
is_yunmu_char(char ch) const183 bool SpellingTrie::is_yunmu_char(char ch) const {
184   return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
185 }
186 
is_szm_char(char ch) const187 bool SpellingTrie::is_szm_char(char ch) const {
188   return is_shengmu_char(ch) || is_yunmu_char(ch);
189 }
190 
szm_is_enabled(char ch) const191 bool SpellingTrie::szm_is_enabled(char ch) const {
192   return char_flags_[ch - 'A'] & kHalfIdSzmMask;
193 }
194 
szm_enable_shm(bool enable)195 void SpellingTrie::szm_enable_shm(bool enable) {
196   if (enable) {
197     for (char ch = 'A'; ch <= 'Z'; ch++) {
198       if (is_shengmu_char(ch))
199         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
200     }
201   } else {
202     for (char ch = 'A'; ch <= 'Z'; ch++) {
203       if (is_shengmu_char(ch))
204         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
205     }
206   }
207 }
208 
szm_enable_ym(bool enable)209 void SpellingTrie::szm_enable_ym(bool enable) {
210   if (enable) {
211     for (char ch = 'A'; ch <= 'Z'; ch++) {
212       if (is_yunmu_char(ch))
213         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
214     }
215   } else {
216     for (char ch = 'A'; ch <= 'Z'; ch++) {
217       if (is_yunmu_char(ch))
218         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
219     }
220   }
221 }
222 
is_szm_enabled(char ch) const223 bool SpellingTrie::is_szm_enabled(char ch) const {
224   return char_flags_[ch - 'A'] & kHalfIdSzmMask;
225 }
226 
get_cpinstance()227 const SpellingTrie* SpellingTrie::get_cpinstance() {
228   return &get_instance();
229 }
230 
get_instance()231 SpellingTrie& SpellingTrie::get_instance() {
232   if (NULL == instance_)
233     instance_ = new SpellingTrie();
234 
235   return *instance_;
236 }
237 
half2full_num(uint16 half_id) const238 uint16 SpellingTrie::half2full_num(uint16 half_id) const {
239   if (NULL == root_ || half_id >= kFullSplIdStart)
240     return 0;
241   return h2f_num_[half_id];
242 }
243 
half_to_full(uint16 half_id,uint16 * spl_id_start) const244 uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
245   if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
246     return 0;
247 
248   *spl_id_start = h2f_start_[half_id];
249   return h2f_num_[half_id];
250 }
251 
full_to_half(uint16 full_id) const252 uint16 SpellingTrie::full_to_half(uint16 full_id) const {
253   if (NULL == root_ || full_id < kFullSplIdStart ||
254       full_id > spelling_num_ + kFullSplIdStart)
255     return 0;
256 
257   return f2h_[full_id - kFullSplIdStart];
258 }
259 
free_son_trie(SpellingNode * node)260 void SpellingTrie::free_son_trie(SpellingNode* node) {
261   if (NULL == node)
262     return;
263 
264   for (size_t pos = 0; pos < node->num_of_son; pos++) {
265     free_son_trie(node->first_son + pos);
266   }
267 
268   if (NULL != node->first_son)
269     delete [] node->first_son;
270 }
271 
construct(const char * spelling_arr,size_t item_size,size_t item_num,float score_amplifier,unsigned char average_score)272 bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
273                              size_t item_num, float score_amplifier,
274                              unsigned char average_score) {
275   if (spelling_arr == NULL)
276     return false;
277 
278   memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
279   memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
280 
281   // If the arr is the same as the buf, means this function is called by
282   // load_table(), the table data are ready; otherwise the array should be
283   // saved.
284   if (spelling_arr != spelling_buf_) {
285     if (NULL != spelling_buf_)
286       delete [] spelling_buf_;
287     spelling_buf_ = new char[item_size * item_num];
288     if (NULL == spelling_buf_)
289       return false;
290     memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
291   }
292 
293   spelling_size_ = item_size;
294   spelling_num_ = item_num;
295 
296   score_amplifier_ = score_amplifier;
297   average_score_ = average_score;
298 
299   if (NULL != splstr_queried_)
300     delete [] splstr_queried_;
301   splstr_queried_ = new char[spelling_size_];
302   if (NULL == splstr_queried_)
303     return false;
304 
305   if (NULL != splstr16_queried_)
306     delete [] splstr16_queried_;
307   splstr16_queried_ = new char16[spelling_size_];
308   if (NULL == splstr16_queried_)
309     return false;
310 
311   // First, sort the buf to ensure they are in ascendant order
312   qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
313 
314 #ifdef ___BUILD_MODEL___
315   node_num_ = 1;
316 #endif
317 
318   root_ = new SpellingNode();
319   memset(root_, 0, sizeof(SpellingNode));
320 
321   dumb_node_ = new SpellingNode();
322   memset(dumb_node_, 0, sizeof(SpellingNode));
323   dumb_node_->score = average_score_;
324 
325   splitter_node_ = new SpellingNode();
326   memset(splitter_node_, 0, sizeof(SpellingNode));
327   splitter_node_->score = average_score_;
328 
329   memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);
330 
331   root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
332 
333   // Root's score should be cleared.
334   root_->score = 0;
335 
336   if (NULL == root_->first_son)
337     return false;
338 
339   h2f_start_[0] = h2f_num_[0] = 0;
340 
341   if (!build_f2h())
342     return false;
343 
344 #ifdef ___BUILD_MODEL___
345   if (kPrintDebug0) {
346     printf("---SpellingTrie Nodes: %d\n", node_num_);
347   }
348   return build_ym_info();
349 #else
350   return true;
351 #endif
352 }
353 
354 #ifdef ___BUILD_MODEL___
get_ym_str(const char * spl_str)355 const char* SpellingTrie::get_ym_str(const char *spl_str) {
356   bool start_ZCS = false;
357   if (is_shengmu_char(*spl_str)) {
358     if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
359       start_ZCS = true;
360     spl_str += 1;
361     if (start_ZCS && 'h' == *spl_str)
362       spl_str += 1;
363   }
364   return spl_str;
365 }
366 
build_ym_info()367 bool SpellingTrie::build_ym_info() {
368   bool sucess;
369   SpellingTable *spl_table = new SpellingTable();
370 
371   sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
372   assert(sucess);
373 
374   for (uint16 pos = 0; pos < spelling_num_; pos++) {
375     const char *spl_str = spelling_buf_ + spelling_size_ * pos;
376     spl_str = get_ym_str(spl_str);
377     if ('\0' != spl_str[0]) {
378       sucess = spl_table->put_spelling(spl_str, 0);
379       assert(sucess);
380     }
381   }
382 
383   size_t ym_item_size;  // '\0' is included
384   size_t ym_num;
385   const char* ym_buf;
386   ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
387 
388   if (NULL != ym_buf_)
389     delete [] ym_buf_;
390   ym_buf_ = new char[ym_item_size * ym_num];
391   if (NULL == ym_buf_) {
392     delete spl_table;
393     return false;
394   }
395 
396   memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
397   ym_size_ = ym_item_size;
398   ym_num_ = ym_num;
399 
400   delete spl_table;
401 
402   // Generate the maping from the spelling ids to the Yunmu ids.
403   if (spl_ym_ids_)
404     delete spl_ym_ids_;
405   spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
406   if (NULL == spl_ym_ids_)
407     return false;
408 
409   memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
410 
411   for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
412     const char *str = get_spelling_str(id);
413 
414     str = get_ym_str(str);
415     if ('\0' != str[0]) {
416       uint8 ym_id = get_ym_id(str);
417       spl_ym_ids_[id] = ym_id;
418       assert(ym_id > 0);
419     } else {
420       spl_ym_ids_[id] = 0;
421     }
422   }
423   return true;
424 }
425 #endif
426 
construct_spellings_subset(size_t item_start,size_t item_end,size_t level,SpellingNode * parent)427 SpellingNode* SpellingTrie::construct_spellings_subset(
428     size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
429   if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
430     return NULL;
431 
432   SpellingNode *first_son = NULL;
433   uint16 num_of_son = 0;
434   unsigned char min_son_score = 255;
435 
436   const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
437   char char_for_node = spelling_last_start[level];
438   assert(char_for_node >= 'A' && char_for_node <= 'Z' ||
439          'h' == char_for_node);
440 
441   // Scan the array to find how many sons
442   for (size_t i = item_start + 1; i < item_end; i++) {
443     const char *spelling_current = spelling_buf_ + spelling_size_ * i;
444     char char_current = spelling_current[level];
445     if (char_current != char_for_node) {
446       num_of_son++;
447       char_for_node = char_current;
448     }
449   }
450   num_of_son++;
451 
452   // Allocate memory
453 #ifdef ___BUILD_MODEL___
454   node_num_ += num_of_son;
455 #endif
456   first_son = new SpellingNode[num_of_son];
457   memset(first_son, 0, sizeof(SpellingNode)*num_of_son);
458 
459   // Now begin construct tree
460   size_t son_pos = 0;
461 
462   spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
463   char_for_node = spelling_last_start[level];
464 
465   bool spelling_endable = true;
466   if (spelling_last_start[level + 1] != '\0')
467     spelling_endable = false;
468 
469   size_t item_start_next = item_start;
470 
471   for (size_t i = item_start + 1; i < item_end; i++) {
472     const char *spelling_current = spelling_buf_ + spelling_size_ * i;
473     char char_current = spelling_current[level];
474     assert(is_valid_spl_char(char_current));
475 
476     if (char_current != char_for_node) {
477       // Construct a node
478       SpellingNode *node_current = first_son + son_pos;
479       node_current->char_this_node = char_for_node;
480 
481       // For quick search in the first level
482       if (0 == level)
483         level1_sons_[char_for_node - 'A'] = node_current;
484 
485       if (spelling_endable) {
486         node_current->spelling_idx = kFullSplIdStart + item_start_next;
487       }
488 
489       if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
490         size_t real_start = item_start_next;
491         if (spelling_last_start[level + 1] == '\0')
492           real_start++;
493 
494         node_current->first_son =
495             construct_spellings_subset(real_start, i, level + 1,
496                                        node_current);
497 
498         if (real_start == item_start_next + 1) {
499           uint16 score_this = static_cast<unsigned char>(
500               spelling_last_start[spelling_size_ - 1]);
501           if (score_this < node_current->score)
502             node_current->score = score_this;
503         }
504       } else {
505         node_current->first_son = NULL;
506         node_current->score = static_cast<unsigned char>(
507             spelling_last_start[spelling_size_ - 1]);
508       }
509 
510       if (node_current->score < min_son_score)
511         min_son_score = node_current->score;
512 
513       bool is_half = false;
514       if (level == 0 && is_szm_char(char_for_node)) {
515         node_current->spelling_idx =
516           static_cast<uint16>(char_for_node - 'A' + 1);
517 
518         if (char_for_node > 'C')
519           node_current->spelling_idx++;
520         if (char_for_node > 'S')
521           node_current->spelling_idx++;
522 
523         h2f_num_[node_current->spelling_idx] = i - item_start_next;
524         is_half = true;
525       } else if (level == 1 && char_for_node == 'h') {
526         char ch_level0 = spelling_last_start[0];
527         uint16 part_id = 0;
528         if (ch_level0 == 'C')
529           part_id = 'C' - 'A' + 1 + 1;
530         else if (ch_level0 == 'S')
531           part_id = 'S' - 'A' + 1 + 2;
532         else if (ch_level0 == 'Z')
533           part_id = 'Z' - 'A' + 1 + 3;
534         if (0 != part_id) {
535           node_current->spelling_idx = part_id;
536           h2f_num_[node_current->spelling_idx] = i - item_start_next;
537           is_half = true;
538         }
539       }
540 
541       if (is_half) {
542         if (h2f_num_[node_current->spelling_idx] > 0)
543           h2f_start_[node_current->spelling_idx] =
544             item_start_next + kFullSplIdStart;
545         else
546           h2f_start_[node_current->spelling_idx] = 0;
547       }
548 
549       // for next sibling
550       spelling_last_start = spelling_current;
551       char_for_node = char_current;
552       item_start_next = i;
553       spelling_endable = true;
554       if (spelling_current[level + 1] != '\0')
555         spelling_endable = false;
556 
557       son_pos++;
558     }
559   }
560 
561   // the last one
562   SpellingNode *node_current = first_son + son_pos;
563   node_current->char_this_node = char_for_node;
564 
565   // For quick search in the first level
566   if (0 == level)
567     level1_sons_[char_for_node - 'A'] = node_current;
568 
569   if (spelling_endable) {
570     node_current->spelling_idx = kFullSplIdStart + item_start_next;
571   }
572 
573   if (spelling_last_start[level + 1] != '\0' ||
574       item_end - item_start_next > 1) {
575     size_t real_start = item_start_next;
576     if (spelling_last_start[level + 1] == '\0')
577       real_start++;
578 
579     node_current->first_son =
580         construct_spellings_subset(real_start, item_end, level + 1,
581                                    node_current);
582 
583     if (real_start == item_start_next + 1) {
584       uint16 score_this = static_cast<unsigned char>(
585           spelling_last_start[spelling_size_ - 1]);
586       if (score_this < node_current->score)
587         node_current->score = score_this;
588     }
589   } else {
590     node_current->first_son = NULL;
591     node_current->score = static_cast<unsigned char>(
592         spelling_last_start[spelling_size_ - 1]);
593   }
594 
595   if (node_current->score < min_son_score)
596     min_son_score = node_current->score;
597 
598   assert(son_pos + 1 == num_of_son);
599 
600   bool is_half = false;
601   if (level == 0 && szm_is_enabled(char_for_node)) {
602     node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
603 
604     if (char_for_node > 'C')
605       node_current->spelling_idx++;
606     if (char_for_node > 'S')
607       node_current->spelling_idx++;
608 
609     h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
610     is_half = true;
611   } else if (level == 1 && char_for_node == 'h') {
612     char ch_level0 = spelling_last_start[0];
613     uint16 part_id = 0;
614     if (ch_level0 == 'C')
615       part_id = 'C' - 'A' + 1 + 1;
616     else if (ch_level0 == 'S')
617       part_id = 'S' - 'A' + 1 + 2;
618     else if (ch_level0 == 'Z')
619       part_id = 'Z' - 'A' + 1 + 3;
620     if (0 != part_id) {
621       node_current->spelling_idx = part_id;
622       h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
623       is_half = true;
624     }
625   }
626   if (is_half) {
627     if (h2f_num_[node_current->spelling_idx] > 0)
628       h2f_start_[node_current->spelling_idx] =
629         item_start_next + kFullSplIdStart;
630     else
631       h2f_start_[node_current->spelling_idx] = 0;
632   }
633 
634   parent->num_of_son = num_of_son;
635   parent->score = min_son_score;
636   return first_son;
637 }
638 
save_spl_trie(FILE * fp)639 bool SpellingTrie::save_spl_trie(FILE *fp) {
640   if (NULL == fp || NULL == spelling_buf_)
641     return false;
642 
643   if (fwrite(&spelling_size_, sizeof(size_t), 1, fp) != 1)
644     return false;
645 
646   if (fwrite(&spelling_num_, sizeof(size_t), 1, fp) != 1)
647     return false;
648 
649   if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
650     return false;
651 
652   if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
653     return false;
654 
655   if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
656              spelling_num_, fp) != spelling_num_)
657     return false;
658 
659   return true;
660 }
661 
load_spl_trie(FILE * fp)662 bool SpellingTrie::load_spl_trie(FILE *fp) {
663   if (NULL == fp)
664     return false;
665 
666   if (fread(&spelling_size_, sizeof(size_t), 1, fp) != 1)
667     return false;
668 
669   if (fread(&spelling_num_, sizeof(size_t), 1, fp) != 1)
670     return false;
671 
672   if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
673     return false;
674 
675   if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
676     return false;
677 
678   if (NULL != spelling_buf_)
679     delete [] spelling_buf_;
680 
681   spelling_buf_ = new char[spelling_size_ * spelling_num_];
682   if (NULL == spelling_buf_)
683     return false;
684 
685   if (fread(spelling_buf_, sizeof(char) * spelling_size_,
686             spelling_num_, fp) != spelling_num_)
687     return false;
688 
689   return construct(spelling_buf_, spelling_size_, spelling_num_,
690                    score_amplifier_, average_score_);
691 }
692 
build_f2h()693 bool SpellingTrie::build_f2h() {
694   if (NULL != f2h_)
695     delete [] f2h_;
696   f2h_ = new uint16[spelling_num_];
697   if (NULL == f2h_)
698     return false;
699 
700   for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
701     for (uint16 fid = h2f_start_[hid];
702          fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
703       f2h_[fid - kFullSplIdStart] = hid;
704   }
705 
706   return true;
707 }
708 
get_spelling_num()709 size_t SpellingTrie::get_spelling_num() {
710   return spelling_num_;
711 }
712 
get_ym_id(const char * ym_str)713 uint8 SpellingTrie::get_ym_id(const char *ym_str) {
714   if (NULL == ym_str || NULL == ym_buf_)
715     return 0;
716 
717   for (uint8 pos = 0; pos < ym_num_; pos++)
718     if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
719       return pos + 1;
720 
721   return 0;
722 }
723 
get_spelling_str(uint16 splid)724 const char* SpellingTrie::get_spelling_str(uint16 splid) {
725   splstr_queried_[0] = '\0';
726 
727   if (splid >= kFullSplIdStart) {
728     splid -= kFullSplIdStart;
729     snprintf(splstr_queried_, spelling_size_, "%s",
730              spelling_buf_ + splid * spelling_size_);
731   } else {
732     if (splid == 'C' - 'A' + 1 + 1) {
733       snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
734     } else if (splid == 'S' - 'A' + 1 + 2) {
735       snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
736     } else if (splid == 'Z' - 'A' + 1 + 3) {
737       snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
738     } else {
739       if (splid > 'C' - 'A' + 1)
740         splid--;
741       if (splid > 'S' - 'A' + 1)
742         splid--;
743       splstr_queried_[0] = 'A' + splid - 1;
744       splstr_queried_[1] = '\0';
745     }
746   }
747   return splstr_queried_;
748 }
749 
get_spelling_str16(uint16 splid)750 const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
751   splstr16_queried_[0] = '\0';
752 
753   if (splid >= kFullSplIdStart) {
754     splid -= kFullSplIdStart;
755     for (size_t pos = 0; pos < spelling_size_; pos++) {
756       splstr16_queried_[pos] = static_cast<char16>
757           (spelling_buf_[splid * spelling_size_ + pos]);
758     }
759   } else {
760     if (splid == 'C' - 'A' + 1 + 1) {
761       splstr16_queried_[0] = static_cast<char16>('C');
762       splstr16_queried_[1] = static_cast<char16>('h');
763       splstr16_queried_[2] = static_cast<char16>('\0');
764     } else if (splid == 'S' - 'A' + 1 + 2) {
765       splstr16_queried_[0] = static_cast<char16>('S');
766       splstr16_queried_[1] = static_cast<char16>('h');
767       splstr16_queried_[2] = static_cast<char16>('\0');
768     } else if (splid == 'Z' - 'A' + 1 + 3) {
769       splstr16_queried_[0] = static_cast<char16>('Z');
770       splstr16_queried_[1] = static_cast<char16>('h');
771       splstr16_queried_[2] = static_cast<char16>('\0');
772     } else {
773       if (splid > 'C' - 'A' + 1)
774         splid--;
775       if (splid > 'S' - 'A' + 1)
776         splid--;
777       splstr16_queried_[0] = 'A' + splid - 1;
778       splstr16_queried_[1] = '\0';
779     }
780   }
781   return splstr16_queried_;
782 }
783 
get_spelling_str16(uint16 splid,char16 * splstr16,size_t splstr16_len)784 size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
785                                         size_t splstr16_len) {
786   if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
787 
788   if (splid >= kFullSplIdStart) {
789     splid -= kFullSplIdStart;
790     for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
791       splstr16[pos] = static_cast<char16>
792           (spelling_buf_[splid * spelling_size_ + pos]);
793       if (static_cast<char16>('\0') == splstr16[pos]) {
794         return pos;
795       }
796     }
797   } else {
798     if (splid == 'C' - 'A' + 1 + 1) {
799       splstr16[0] = static_cast<char16>('C');
800       splstr16[1] = static_cast<char16>('h');
801       splstr16[2] = static_cast<char16>('\0');
802       return 2;
803     } else if (splid == 'S' - 'A' + 1 + 2) {
804       splstr16[0] = static_cast<char16>('S');
805       splstr16[1] = static_cast<char16>('h');
806       splstr16[2] = static_cast<char16>('\0');
807       return 2;
808     } else if (splid == 'Z' - 'A' + 1 + 3) {
809       splstr16[0] = static_cast<char16>('Z');
810       splstr16[1] = static_cast<char16>('h');
811       splstr16[2] = static_cast<char16>('\0');
812       return 2;
813     } else {
814       if (splid > 'C' - 'A' + 1)
815         splid--;
816       if (splid > 'S' - 'A' + 1)
817         splid--;
818       splstr16[0] = 'A' + splid - 1;
819       splstr16[1] = '\0';
820       return 1;
821     }
822   }
823 
824   // Not reachable.
825   return 0;
826 }
827 
828 }  // namespace ime_pinyin
829