• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        ratngs.h  (Formerly ratings.h)
3  * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
4  * Author:      Ray Smith
5  * Created:     Thu Apr 23 11:40:38 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef           RATNGS_H
21 #define           RATNGS_H
22 
23 #include <assert.h>
24 
25 #include "clst.h"
26 #include "genericvector.h"
27 #include "notdll.h"
28 #include "unichar.h"
29 #include "unicharset.h"
30 #include "werd.h"
31 
32 class BLOB_CHOICE: public ELIST_LINK
33 {
34   public:
BLOB_CHOICE()35     BLOB_CHOICE() {
36       unichar_id_ = INVALID_UNICHAR_ID;
37       config_ = '\0';
38       rating_ = MAX_FLOAT32;
39       certainty_ = -MAX_FLOAT32;
40       script_id_ = -1;
41     }
42     BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
43                 float src_rating,          // rating
44                 float src_cert,            // certainty
45                 inT8 src_config,           // config (font)
46                 int script_id);            // script
47     BLOB_CHOICE(const BLOB_CHOICE &other);
~BLOB_CHOICE()48     ~BLOB_CHOICE() {}
unichar_id()49     UNICHAR_ID unichar_id() const {
50       return unichar_id_;
51     }
rating()52     float rating() const {
53       return rating_;
54     }
certainty()55     float certainty() const {
56       return certainty_;
57     }
config()58     inT8 config() const {
59       return config_;
60     }
script_id()61     int script_id() const {
62       return script_id_;
63     }
64 
set_unichar_id(UNICHAR_ID newunichar_id)65     void set_unichar_id(UNICHAR_ID newunichar_id) {
66       unichar_id_ = newunichar_id;
67     }
set_rating(float newrat)68     void set_rating(float newrat) {
69       rating_ = newrat;
70     }
set_certainty(float newrat)71     void set_certainty(float newrat) {
72       certainty_ = newrat;
73     }
set_config(inT8 newfont)74     void set_config(inT8 newfont) {
75       config_ = newfont;
76     }
set_script(int newscript_id)77     void set_script(int newscript_id) {
78       script_id_ = newscript_id;
79     }
80 
deep_copy(const BLOB_CHOICE * src)81     static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) {
82       BLOB_CHOICE* choice = new BLOB_CHOICE;
83       *choice = *src;
84       return choice;
85     }
86 
87     NEWDELETE
88  private:
89     UNICHAR_ID unichar_id_;          // unichar id
90     char config_;                    // char config (font)
91     inT16 junk2_;
92     float rating_;                   // size related
93     float certainty_;                // absolute
94     int script_id_;
95 };
96 
97 // Make BLOB_CHOICE listable.
98 ELISTIZEH (BLOB_CHOICE) CLISTIZEH (BLOB_CHOICE_LIST)
99 
100 // Permuter codes used in WERD_CHOICEs.
101 enum PermuterType {
102   NO_PERM,           // 0
103   PUNC_PERM,         // 1
104   TOP_CHOICE_PERM,   // 2
105   LOWER_CASE_PERM,   // 3
106   UPPER_CASE_PERM,   // 4
107   NUMBER_PERM,       // 5
108   SYSTEM_DAWG_PERM,  // 6
109   DOC_DAWG_PERM,     // 7
110   USER_DAWG_PERM,    // 8
111   FREQ_DAWG_PERM,    // 9
112   COMPOUND_PERM,     // 10
113 };
114 
115 class WERD_CHOICE {
116  public:
WERD_CHOICE()117   WERD_CHOICE() { this->init(8); }
WERD_CHOICE(int reserved)118   WERD_CHOICE(int reserved) { this->init(reserved); }
WERD_CHOICE(const char * src_string,const char * src_lengths,float src_rating,float src_certainty,uinT8 src_permuter,const UNICHARSET & unicharset)119   WERD_CHOICE(const char *src_string,
120               const char *src_lengths,
121               float src_rating,
122               float src_certainty,
123               uinT8 src_permuter,
124               const UNICHARSET &unicharset) {
125     this->init(src_string, src_lengths, src_rating,
126                src_certainty, src_permuter, unicharset);
127   }
128   WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset);
WERD_CHOICE(const WERD_CHOICE & word)129   WERD_CHOICE(const WERD_CHOICE &word) {
130     this->init(word.length());
131     this->operator=(word);
132   }
133   ~WERD_CHOICE();
134 
length()135   inline int length() const {
136     return length_;
137   }
unichar_ids()138   inline const UNICHAR_ID *unichar_ids() const {
139     return unichar_ids_;
140   }
unichar_id(int index)141   inline const UNICHAR_ID unichar_id(int index) const {
142     assert(index < length_);
143     return unichar_ids_[index];
144   }
fragment_lengths()145   inline const char *fragment_lengths() const {
146     return fragment_lengths_;
147   }
fragment_length(int index)148   inline const char fragment_length(int index) const {
149     assert(index < length_);
150     return fragment_lengths_[index];
151   }
rating()152   inline float rating() const {
153     return rating_;
154   }
certainty()155   inline float certainty() const {
156     return certainty_;
157   }
permuter()158   inline uinT8 permuter() const {
159     return permuter_;
160   }
fragment_mark()161   inline bool fragment_mark() const {
162     return fragment_mark_;
163   }
blob_choices()164   inline BLOB_CHOICE_LIST_CLIST* blob_choices() {
165     return blob_choices_;
166   }
set_unichar_id(UNICHAR_ID unichar_id,int index)167   inline void set_unichar_id(UNICHAR_ID unichar_id, int index) {
168     assert(index < length_);
169     unichar_ids_[index] = unichar_id;
170   }
set_rating(float new_val)171   inline void set_rating(float new_val) {
172     rating_ = new_val;
173   }
set_certainty(float new_val)174   inline void set_certainty(float new_val) {
175     certainty_ = new_val;
176   }
set_permuter(uinT8 perm)177   inline void set_permuter(uinT8 perm) {
178     permuter_ = perm;
179   }
set_fragment_mark(bool new_fragment_mark)180   inline void set_fragment_mark(bool new_fragment_mark) {
181     fragment_mark_ = new_fragment_mark;
182   }
183   void set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices);
184 
185   // Make more space in unichar_id_ and fragment_lengths_ arrays.
double_the_size()186   inline void double_the_size() {
187     unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy(
188         reserved_, unichar_ids_);
189     fragment_lengths_ = GenericVector<char>::double_the_size_memcpy(
190         reserved_, fragment_lengths_);
191     reserved_ *= 2;
192   }
193 
194   // Initializes WERD_CHOICE - reseves length slots in unichar_ids_ and
195   // fragment_length_ arrays. Sets other values to default (blank) values.
init(int reserved)196   inline void init(int reserved) {
197     reserved_ = reserved;
198     unichar_ids_ = new UNICHAR_ID[reserved];
199     fragment_lengths_ = new char[reserved];
200     length_ = 0;
201     rating_ = 0.0;
202     certainty_ = MAX_FLOAT32;
203     permuter_ = NO_PERM;
204     fragment_mark_ = false;
205     blob_choices_ = NULL;
206     unichar_string_ = "";
207     unichar_lengths_ = "";
208   }
209 
210   // Helper function to build a WERD_CHOICE from the given string,
211   // fragment lengths, rating, certainty and permuter.
212   // The function assumes that src_string is not NULL.
213   // src_lengths argument could be NULL, in which case the unichars
214   // in src_string are assumed to all be of length 1.
215   void init(const char *src_string, const char *src_lengths,
216             float src_rating, float src_certainty,
217             uinT8 src_permuter, const UNICHARSET &current_unicharset);
218 
219   // Set the fields in this choice to be default (bad) values.
make_bad()220   inline void make_bad() {
221     length_ = 0;
222     rating_ = MAX_FLOAT32;
223     certainty_ = -MAX_FLOAT32;
224     fragment_mark_ = false;
225     unichar_string_ = "";
226     unichar_lengths_ = "";
227   }
228 
229   // This function assumes that there is enough space reserved
230   // in the WERD_CHOICE for adding another unichar.
231   // This is an efficient alternative to append_unichar_id().
append_unichar_id_space_allocated(UNICHAR_ID unichar_id,char fragment_length,float rating,float certainty)232   inline void append_unichar_id_space_allocated(
233       UNICHAR_ID unichar_id, char fragment_length,
234       float rating, float certainty) {
235     assert(reserved_ > length_);
236     length_++;
237     this->set_unichar_id(unichar_id, fragment_length,
238                          rating, certainty, length_-1);
239   }
240 
241   void append_unichar_id(UNICHAR_ID unichar_id, char fragment_length,
242                          float rating, float certainty);
243 
set_unichar_id(UNICHAR_ID unichar_id,char fragment_length,float rating,float certainty,int index)244   inline void set_unichar_id(UNICHAR_ID unichar_id, char fragment_length,
245                              float rating, float certainty, int index) {
246     assert(index < length_);
247     unichar_ids_[index] = unichar_id;
248     fragment_lengths_[index] = fragment_length;
249     rating_ += rating;
250     if (certainty < certainty_) {
251       certainty_ = certainty;
252     }
253   }
254 
255   bool contains_unichar_id(UNICHAR_ID unichar_id) const;
256   void remove_unichar_ids(int index, int num);
remove_last_unichar_id()257   inline void remove_last_unichar_id() { --length_; }
remove_unichar_id(int index)258   inline void remove_unichar_id(int index) { this->remove_unichar_ids(index, 1); }
259   void string_and_lengths(const UNICHARSET &current_unicharset,
260                           STRING *word_str, STRING *word_lengths_str) const;
debug_string(const UNICHARSET & current_unicharset)261   const STRING debug_string(const UNICHARSET &current_unicharset) const {
262     STRING word_str;
263     for (int i = 0; i < length_; ++i) {
264       word_str += current_unicharset.debug_str(unichar_ids_[i]);
265       word_str += " ";
266     }
267     return word_str;
268   }
269   // Since this function walks over the whole word to convert unichar ids
270   // to unichars, it is best to call it once, e.g. after all changes to
271   // unichar_ids_ in WERD_CHOICE are finished.
populate_unichars(const UNICHARSET & current_unicharset)272   void populate_unichars(const UNICHARSET &current_unicharset) {
273     this->string_and_lengths(current_unicharset, &unichar_string_,
274                              &unichar_lengths_);
275   }
276   // This function should only be called if populate_unichars()
277   // was called and WERD_CHOICE did not change since then.
unichar_string()278   const STRING &unichar_string() const {
279     assert(unichar_string_.length() <= 0 ||
280            unichar_string_.length() >= length_);  // sanity check
281     return unichar_string_;
282   }
283   // This function should only be called if populate_unichars()
284   // was called and WERD_CHOICE did not change since then.
unichar_lengths()285   const STRING &unichar_lengths() const {
286     assert(unichar_lengths_.length() <= 0 ||
287            unichar_lengths_.length() == length_);  // sanity check
288     return unichar_lengths_;
289   }
print()290   const void print() const { this->print(""); }
291   const void print(const char *msg) const;
292 
293   WERD_CHOICE& operator+= (     // concatanate
294     const WERD_CHOICE & second);// second on first
295 
296   WERD_CHOICE& operator= (const WERD_CHOICE& source);
297 
298   NEWDELETE private:
299   UNICHAR_ID *unichar_ids_;  // unichar ids that represent the text of the word
300   char *fragment_lengths_;   // number of fragments in each unichar
301   int reserved_;             // size of the above arrays
302   int length_;               // word length
303   float rating_;             // size related
304   float certainty_;          // absolute
305   uinT8 permuter_;           // permuter code
306   bool fragment_mark_;       // if true, indicates that this choice
307                              // was chosen over a better one that
308                              // contained a fragment
309   BLOB_CHOICE_LIST_CLIST *blob_choices_;  // best choices for each blob
310 
311   // The following variables are only populated by calling populate_unichars().
312   // They are not synchronized with the values in unichar_ids otherwise.
313   STRING unichar_string_;
314   STRING unichar_lengths_;
315   bool unichar_info_present;
316 
317  private:
318   void delete_blob_choices();
319 };
320 
321 // Make WERD_CHOICE listable.
322 ELISTIZEH (WERD_CHOICE)
323 typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR;
324 typedef GenericVector<WERD_CHOICE_LIST *> WERD_CHOICE_LIST_VECTOR;
325 
326 typedef void (*POLY_TESTER) (const STRING&, PBLOB *, DENORM *, BOOL8,
327                              char *, inT32, BLOB_CHOICE_LIST *);
328 
329 void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings);
330 void print_ratings_list(
331     const char *msg,                      // intro message
332     BLOB_CHOICE_LIST *ratings,            // list of results
333     const UNICHARSET &current_unicharset  // unicharset that can be used
334                                           // for id-to-unichar conversion
335     );
336 void print_ratings_info(
337     FILE *fp,                             // file to use
338     BLOB_CHOICE_LIST *ratings,            // list of results
339     const UNICHARSET &current_unicharset  // unicharset that can be used
340                                           // for id-to-unichar conversion
341     );
342 void print_char_choices_list(
343     const char *msg,
344     const BLOB_CHOICE_LIST_VECTOR &char_choices,
345     const UNICHARSET &current_unicharset,
346     BOOL8 detailed
347     );
348 
349 #endif
350