1 /********************************************************************** 2 * File: ratngs.h (Formerly ratings.h) 3 * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes. 4 * Author: Ray Smith 5 * Created: Thu Apr 23 11:40:38 BST 1992 6 * 7 * (C) Copyright 1992, Hewlett-Packard Ltd. 8 ** Licensed under the Apache License, Version 2.0 (the "License"); 9 ** you may not use this file except in compliance with the License. 10 ** You may obtain a copy of the License at 11 ** http://www.apache.org/licenses/LICENSE-2.0 12 ** Unless required by applicable law or agreed to in writing, software 13 ** distributed under the License is distributed on an "AS IS" BASIS, 14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 ** See the License for the specific language governing permissions and 16 ** limitations under the License. 17 * 18 **********************************************************************/ 19 20 #ifndef RATNGS_H 21 #define RATNGS_H 22 23 #include <assert.h> 24 25 #include "clst.h" 26 #include "genericvector.h" 27 #include "notdll.h" 28 #include "unichar.h" 29 #include "unicharset.h" 30 #include "werd.h" 31 32 class BLOB_CHOICE: public ELIST_LINK 33 { 34 public: BLOB_CHOICE()35 BLOB_CHOICE() { 36 unichar_id_ = INVALID_UNICHAR_ID; 37 config_ = '\0'; 38 rating_ = MAX_FLOAT32; 39 certainty_ = -MAX_FLOAT32; 40 script_id_ = -1; 41 } 42 BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id 43 float src_rating, // rating 44 float src_cert, // certainty 45 inT8 src_config, // config (font) 46 int script_id); // script 47 BLOB_CHOICE(const BLOB_CHOICE &other); ~BLOB_CHOICE()48 ~BLOB_CHOICE() {} unichar_id()49 UNICHAR_ID unichar_id() const { 50 return unichar_id_; 51 } rating()52 float rating() const { 53 return rating_; 54 } certainty()55 float certainty() const { 56 return certainty_; 57 } config()58 inT8 config() const { 59 return config_; 60 } script_id()61 int script_id() const { 62 return script_id_; 63 } 64 set_unichar_id(UNICHAR_ID newunichar_id)65 void set_unichar_id(UNICHAR_ID newunichar_id) { 66 unichar_id_ = newunichar_id; 67 } set_rating(float newrat)68 void set_rating(float newrat) { 69 rating_ = newrat; 70 } set_certainty(float newrat)71 void set_certainty(float newrat) { 72 certainty_ = newrat; 73 } set_config(inT8 newfont)74 void set_config(inT8 newfont) { 75 config_ = newfont; 76 } set_script(int newscript_id)77 void set_script(int newscript_id) { 78 script_id_ = newscript_id; 79 } 80 deep_copy(const BLOB_CHOICE * src)81 static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) { 82 BLOB_CHOICE* choice = new BLOB_CHOICE; 83 *choice = *src; 84 return choice; 85 } 86 87 NEWDELETE 88 private: 89 UNICHAR_ID unichar_id_; // unichar id 90 char config_; // char config (font) 91 inT16 junk2_; 92 float rating_; // size related 93 float certainty_; // absolute 94 int script_id_; 95 }; 96 97 // Make BLOB_CHOICE listable. 98 ELISTIZEH (BLOB_CHOICE) CLISTIZEH (BLOB_CHOICE_LIST) 99 100 // Permuter codes used in WERD_CHOICEs. 101 enum PermuterType { 102 NO_PERM, // 0 103 PUNC_PERM, // 1 104 TOP_CHOICE_PERM, // 2 105 LOWER_CASE_PERM, // 3 106 UPPER_CASE_PERM, // 4 107 NUMBER_PERM, // 5 108 SYSTEM_DAWG_PERM, // 6 109 DOC_DAWG_PERM, // 7 110 USER_DAWG_PERM, // 8 111 FREQ_DAWG_PERM, // 9 112 COMPOUND_PERM, // 10 113 }; 114 115 class WERD_CHOICE { 116 public: WERD_CHOICE()117 WERD_CHOICE() { this->init(8); } WERD_CHOICE(int reserved)118 WERD_CHOICE(int reserved) { this->init(reserved); } WERD_CHOICE(const char * src_string,const char * src_lengths,float src_rating,float src_certainty,uinT8 src_permuter,const UNICHARSET & unicharset)119 WERD_CHOICE(const char *src_string, 120 const char *src_lengths, 121 float src_rating, 122 float src_certainty, 123 uinT8 src_permuter, 124 const UNICHARSET &unicharset) { 125 this->init(src_string, src_lengths, src_rating, 126 src_certainty, src_permuter, unicharset); 127 } 128 WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset); WERD_CHOICE(const WERD_CHOICE & word)129 WERD_CHOICE(const WERD_CHOICE &word) { 130 this->init(word.length()); 131 this->operator=(word); 132 } 133 ~WERD_CHOICE(); 134 length()135 inline int length() const { 136 return length_; 137 } unichar_ids()138 inline const UNICHAR_ID *unichar_ids() const { 139 return unichar_ids_; 140 } unichar_id(int index)141 inline const UNICHAR_ID unichar_id(int index) const { 142 assert(index < length_); 143 return unichar_ids_[index]; 144 } fragment_lengths()145 inline const char *fragment_lengths() const { 146 return fragment_lengths_; 147 } fragment_length(int index)148 inline const char fragment_length(int index) const { 149 assert(index < length_); 150 return fragment_lengths_[index]; 151 } rating()152 inline float rating() const { 153 return rating_; 154 } certainty()155 inline float certainty() const { 156 return certainty_; 157 } permuter()158 inline uinT8 permuter() const { 159 return permuter_; 160 } fragment_mark()161 inline bool fragment_mark() const { 162 return fragment_mark_; 163 } blob_choices()164 inline BLOB_CHOICE_LIST_CLIST* blob_choices() { 165 return blob_choices_; 166 } set_unichar_id(UNICHAR_ID unichar_id,int index)167 inline void set_unichar_id(UNICHAR_ID unichar_id, int index) { 168 assert(index < length_); 169 unichar_ids_[index] = unichar_id; 170 } set_rating(float new_val)171 inline void set_rating(float new_val) { 172 rating_ = new_val; 173 } set_certainty(float new_val)174 inline void set_certainty(float new_val) { 175 certainty_ = new_val; 176 } set_permuter(uinT8 perm)177 inline void set_permuter(uinT8 perm) { 178 permuter_ = perm; 179 } set_fragment_mark(bool new_fragment_mark)180 inline void set_fragment_mark(bool new_fragment_mark) { 181 fragment_mark_ = new_fragment_mark; 182 } 183 void set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices); 184 185 // Make more space in unichar_id_ and fragment_lengths_ arrays. double_the_size()186 inline void double_the_size() { 187 unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy( 188 reserved_, unichar_ids_); 189 fragment_lengths_ = GenericVector<char>::double_the_size_memcpy( 190 reserved_, fragment_lengths_); 191 reserved_ *= 2; 192 } 193 194 // Initializes WERD_CHOICE - reseves length slots in unichar_ids_ and 195 // fragment_length_ arrays. Sets other values to default (blank) values. init(int reserved)196 inline void init(int reserved) { 197 reserved_ = reserved; 198 unichar_ids_ = new UNICHAR_ID[reserved]; 199 fragment_lengths_ = new char[reserved]; 200 length_ = 0; 201 rating_ = 0.0; 202 certainty_ = MAX_FLOAT32; 203 permuter_ = NO_PERM; 204 fragment_mark_ = false; 205 blob_choices_ = NULL; 206 unichar_string_ = ""; 207 unichar_lengths_ = ""; 208 } 209 210 // Helper function to build a WERD_CHOICE from the given string, 211 // fragment lengths, rating, certainty and permuter. 212 // The function assumes that src_string is not NULL. 213 // src_lengths argument could be NULL, in which case the unichars 214 // in src_string are assumed to all be of length 1. 215 void init(const char *src_string, const char *src_lengths, 216 float src_rating, float src_certainty, 217 uinT8 src_permuter, const UNICHARSET ¤t_unicharset); 218 219 // Set the fields in this choice to be default (bad) values. make_bad()220 inline void make_bad() { 221 length_ = 0; 222 rating_ = MAX_FLOAT32; 223 certainty_ = -MAX_FLOAT32; 224 fragment_mark_ = false; 225 unichar_string_ = ""; 226 unichar_lengths_ = ""; 227 } 228 229 // This function assumes that there is enough space reserved 230 // in the WERD_CHOICE for adding another unichar. 231 // This is an efficient alternative to append_unichar_id(). append_unichar_id_space_allocated(UNICHAR_ID unichar_id,char fragment_length,float rating,float certainty)232 inline void append_unichar_id_space_allocated( 233 UNICHAR_ID unichar_id, char fragment_length, 234 float rating, float certainty) { 235 assert(reserved_ > length_); 236 length_++; 237 this->set_unichar_id(unichar_id, fragment_length, 238 rating, certainty, length_-1); 239 } 240 241 void append_unichar_id(UNICHAR_ID unichar_id, char fragment_length, 242 float rating, float certainty); 243 set_unichar_id(UNICHAR_ID unichar_id,char fragment_length,float rating,float certainty,int index)244 inline void set_unichar_id(UNICHAR_ID unichar_id, char fragment_length, 245 float rating, float certainty, int index) { 246 assert(index < length_); 247 unichar_ids_[index] = unichar_id; 248 fragment_lengths_[index] = fragment_length; 249 rating_ += rating; 250 if (certainty < certainty_) { 251 certainty_ = certainty; 252 } 253 } 254 255 bool contains_unichar_id(UNICHAR_ID unichar_id) const; 256 void remove_unichar_ids(int index, int num); remove_last_unichar_id()257 inline void remove_last_unichar_id() { --length_; } remove_unichar_id(int index)258 inline void remove_unichar_id(int index) { this->remove_unichar_ids(index, 1); } 259 void string_and_lengths(const UNICHARSET ¤t_unicharset, 260 STRING *word_str, STRING *word_lengths_str) const; debug_string(const UNICHARSET & current_unicharset)261 const STRING debug_string(const UNICHARSET ¤t_unicharset) const { 262 STRING word_str; 263 for (int i = 0; i < length_; ++i) { 264 word_str += current_unicharset.debug_str(unichar_ids_[i]); 265 word_str += " "; 266 } 267 return word_str; 268 } 269 // Since this function walks over the whole word to convert unichar ids 270 // to unichars, it is best to call it once, e.g. after all changes to 271 // unichar_ids_ in WERD_CHOICE are finished. populate_unichars(const UNICHARSET & current_unicharset)272 void populate_unichars(const UNICHARSET ¤t_unicharset) { 273 this->string_and_lengths(current_unicharset, &unichar_string_, 274 &unichar_lengths_); 275 } 276 // This function should only be called if populate_unichars() 277 // was called and WERD_CHOICE did not change since then. unichar_string()278 const STRING &unichar_string() const { 279 assert(unichar_string_.length() <= 0 || 280 unichar_string_.length() >= length_); // sanity check 281 return unichar_string_; 282 } 283 // This function should only be called if populate_unichars() 284 // was called and WERD_CHOICE did not change since then. unichar_lengths()285 const STRING &unichar_lengths() const { 286 assert(unichar_lengths_.length() <= 0 || 287 unichar_lengths_.length() == length_); // sanity check 288 return unichar_lengths_; 289 } print()290 const void print() const { this->print(""); } 291 const void print(const char *msg) const; 292 293 WERD_CHOICE& operator+= ( // concatanate 294 const WERD_CHOICE & second);// second on first 295 296 WERD_CHOICE& operator= (const WERD_CHOICE& source); 297 298 NEWDELETE private: 299 UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word 300 char *fragment_lengths_; // number of fragments in each unichar 301 int reserved_; // size of the above arrays 302 int length_; // word length 303 float rating_; // size related 304 float certainty_; // absolute 305 uinT8 permuter_; // permuter code 306 bool fragment_mark_; // if true, indicates that this choice 307 // was chosen over a better one that 308 // contained a fragment 309 BLOB_CHOICE_LIST_CLIST *blob_choices_; // best choices for each blob 310 311 // The following variables are only populated by calling populate_unichars(). 312 // They are not synchronized with the values in unichar_ids otherwise. 313 STRING unichar_string_; 314 STRING unichar_lengths_; 315 bool unichar_info_present; 316 317 private: 318 void delete_blob_choices(); 319 }; 320 321 // Make WERD_CHOICE listable. 322 ELISTIZEH (WERD_CHOICE) 323 typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR; 324 typedef GenericVector<WERD_CHOICE_LIST *> WERD_CHOICE_LIST_VECTOR; 325 326 typedef void (*POLY_TESTER) (const STRING&, PBLOB *, DENORM *, BOOL8, 327 char *, inT32, BLOB_CHOICE_LIST *); 328 329 void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings); 330 void print_ratings_list( 331 const char *msg, // intro message 332 BLOB_CHOICE_LIST *ratings, // list of results 333 const UNICHARSET ¤t_unicharset // unicharset that can be used 334 // for id-to-unichar conversion 335 ); 336 void print_ratings_info( 337 FILE *fp, // file to use 338 BLOB_CHOICE_LIST *ratings, // list of results 339 const UNICHARSET ¤t_unicharset // unicharset that can be used 340 // for id-to-unichar conversion 341 ); 342 void print_char_choices_list( 343 const char *msg, 344 const BLOB_CHOICE_LIST_VECTOR &char_choices, 345 const UNICHARSET ¤t_unicharset, 346 BOOL8 detailed 347 ); 348 349 #endif 350