• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
18 #define PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
19 
20 #include <stdlib.h>
21 #include "./spellingtrie.h"
22 
23 namespace ime_pinyin {
24 
25 // Type used to identify the size of a pool, such as id pool, etc.
26 typedef uint16 PoolPosType;
27 
28 // Type used to identify a parsing mile stone in an atom dictionary.
29 typedef uint16 MileStoneHandle;
30 
31 // Type used to express a lemma and its probability score.
32 typedef struct {
33   size_t id:(kLemmaIdSize * 8);
34   size_t lma_len:4;
35   uint16 psb;  // The score, the lower psb, the higher possibility.
36   // For single character items, we may also need Hanzi.
37   // For multiple characer items, ignore it.
38   char16 hanzi;
39 } LmaPsbItem, *PLmaPsbItem;
40 
41 // LmaPsbItem extended with string.
42 typedef struct {
43   LmaPsbItem lpi;
44   char16 str[kMaxLemmaSize + 1];
45 } LmaPsbStrItem, *PLmaPsbStrItem;
46 
47 
48 typedef struct {
49   float psb;
50   char16 pre_hzs[kMaxPredictSize];
51   uint16 his_len;  // The length of the history used to do the prediction.
52 } NPredictItem, *PNPredictItem;
53 
54 // Parameter structure used to extend in a dictionary. All dictionaries
55 // receives the same DictExtPara and a dictionary specific MileStoneHandle for
56 // extending.
57 //
58 // When the user inputs a new character, AtomDictBase::extend_dict() will be
59 // called at least once for each dictionary.
60 //
61 // For example, when the user inputs "wm", extend_dict() will be called twice,
62 // and the DictExtPara parameter are as follows respectively:
63 // 1. splids = {w, m}; splids_extended = 1; ext_len = 1; step_no = 1;
64 // splid_end_split = false; id_start = wa(the first id start with 'w');
65 // id_num = number of ids starting with 'w'.
66 // 2. splids = {m}; splids_extended = 0; ext_len = 1; step_no = 1;
67 // splid_end_split = false; id_start = wa; id_num = number of ids starting with
68 // 'w'.
69 //
70 // For string "women", one of the cases of the DictExtPara parameter is:
71 // splids = {wo, men}, splids_extended = 1, ext_len = 3 (length of "men"),
72 // step_no = 4; splid_end_split = false; id_start = men, id_num = 1.
73 //
74 typedef struct {
75   // Spelling ids for extending, there are splids_extended + 1 ids in the
76   // buffer.
77   // For a normal lemma, there can only be kMaxLemmaSize spelling ids in max,
78   // but for a composing phrase, there can kMaxSearchSteps spelling ids.
79   uint16 splids[kMaxSearchSteps];
80 
81   // Number of ids that have been used before. splids[splids_extended] is the
82   // newly added id for the current extension.
83   uint16 splids_extended;
84 
85   // The step span of the extension. It is also the size of the string for
86   // the newly added spelling id.
87   uint16 ext_len;
88 
89   // The step number for the current extension. It is also the ending position
90   // in the input Pinyin string for the substring of spelling ids in splids[].
91   // For example, when the user inputs "women", step_no = 4.
92   // This parameter may useful to manage the MileStoneHandle list for each
93   // step. When the user deletes a character from the string, MileStoneHandle
94   // objects for the the steps after that character should be reset; when the
95   // user begins a new string, all MileStoneHandle objects should be reset.
96   uint16 step_no;
97 
98   // Indicate whether the newly added spelling ends with a splitting character
99   bool splid_end_split;
100 
101   // If the newly added id is a half id, id_start is the first id of the
102   // corresponding full ids; if the newly added id is a full id, id_start is
103   // that id.
104   uint16 id_start;
105 
106   // If the newly added id is a half id, id_num is the number of corresponding
107   // ids; if it is a full id, id_num == 1.
108   uint16 id_num;
109 }DictExtPara, *PDictExtPara;
110 
111 bool is_system_lemma(LemmaIdType lma_id);
112 bool is_user_lemma(LemmaIdType lma_id);
113 bool is_composing_lemma(LemmaIdType lma_id);
114 
115 int cmp_lpi_with_psb(const void *p1, const void *p2);
116 int cmp_lpi_with_unified_psb(const void *p1, const void *p2);
117 int cmp_lpi_with_id(const void *p1, const void *p2);
118 int cmp_lpi_with_hanzi(const void *p1, const void *p2);
119 
120 int cmp_lpsi_with_str(const void *p1, const void *p2);
121 
122 int cmp_hanzis_1(const void *p1, const void *p2);
123 int cmp_hanzis_2(const void *p1, const void *p2);
124 int cmp_hanzis_3(const void *p1, const void *p2);
125 int cmp_hanzis_4(const void *p1, const void *p2);
126 int cmp_hanzis_5(const void *p1, const void *p2);
127 int cmp_hanzis_6(const void *p1, const void *p2);
128 int cmp_hanzis_7(const void *p1, const void *p2);
129 int cmp_hanzis_8(const void *p1, const void *p2);
130 
131 int cmp_npre_by_score(const void *p1, const void *p2);
132 int cmp_npre_by_hislen_score(const void *p1, const void *p2);
133 int cmp_npre_by_hanzi_score(const void *p1, const void *p2);
134 
135 
136 size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num);
137 
138 size_t align_to_size_t(size_t size);
139 
140 }  // namespace
141 
142 #endif  // PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
143