1 /********************************************************************** 2 * File: rejctmap.h (Formerly rejmap.h) 3 * Description: REJ and REJMAP class functions. 4 * Author: Phil Cheatle 5 * Created: Thu Jun 9 13:46:38 BST 1994 6 * 7 * (C) Copyright 1994, Hewlett-Packard Ltd. 8 ** Licensed under the Apache License, Version 2.0 (the "License"); 9 ** you may not use this file except in compliance with the License. 10 ** You may obtain a copy of the License at 11 ** http://www.apache.org/licenses/LICENSE-2.0 12 ** Unless required by applicable law or agreed to in writing, software 13 ** distributed under the License is distributed on an "AS IS" BASIS, 14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 ** See the License for the specific language governing permissions and 16 ** limitations under the License. 17 * 18 19 This module may look unneccessarily verbose, but here's the philosophy... 20 21 ALL processing of the reject map is done in this module. There are lots of 22 separate calls to set reject/accept flags. These have DELIBERATELY been kept 23 distinct so that this module can decide what to do. 24 25 Basically, there is a flag for each sort of rejection or acceptance. This 26 provides a history of what has happened to EACH character. 27 28 Determining whether a character is CURRENTLY rejected depends on implicit 29 understanding of the SEQUENCE of possible calls. The flags are defined and 30 grouped in the REJ_FLAGS enum. These groupings are used in determining a 31 characters CURRENT rejection status. Basically, a character is ACCEPTED if 32 33 none of the permanent rej flags are set 34 AND ( the character has never been rejected 35 OR an accept flag is set which is LATER than the latest reject flag ) 36 37 IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE 38 OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!! 39 **********************************************************************/ 40 41 #ifndef REJCTMAP_H 42 #define REJCTMAP_H 43 44 #ifdef __UNIX__ 45 #include <assert.h> 46 #endif 47 #include "memry.h" 48 #include "bits16.h" 49 #include "varable.h" 50 #include "notdll.h" 51 52 extern BOOL_VAR_H (rejword_only_set_if_accepted, TRUE, 53 "Mimic old reject_word"); 54 extern BOOL_VAR_H (rejmap_allow_more_good_qual, FALSE, 55 "Use initial good qual setting"); 56 extern BOOL_VAR_H (rej_use_1Il_rej, TRUE, "1Il rejection enabled"); 57 58 enum REJ_FLAGS 59 { 60 /* Reject modes which are NEVER overridden */ 61 R_TESS_FAILURE, // PERM Tess didnt classify 62 R_SMALL_XHT, // PERM Xht too small 63 R_EDGE_CHAR, // PERM Too close to edge of image 64 R_1IL_CONFLICT, // PERM 1Il confusion 65 R_POSTNN_1IL, // PERM 1Il unrejected by NN 66 R_REJ_CBLOB, // PERM Odd blob 67 R_MM_REJECT, // PERM Matrix match rejection (m's) 68 R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend 69 70 /* Initial reject modes (pre NN_ACCEPT) */ 71 R_POOR_MATCH, // TEMP Ray's original heuristic (Not used) 72 R_NOT_TESS_ACCEPTED, // TEMP Tess didnt accept WERD 73 R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD 74 R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD 75 76 /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */ 77 R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop 78 R_DUBIOUS, // TEMP Post NN dodgy chars 79 R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN 80 R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest 81 R_XHT_FIXUP, // TEMP Xht tests unsure 82 83 /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */ 84 R_BAD_QUALITY, // TEMP Quality metrics bad for WERD 85 86 /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/ 87 R_DOC_REJ, // TEMP Document rejection 88 R_BLOCK_REJ, // TEMP Block rejection 89 R_ROW_REJ, // TEMP Row rejection 90 R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space 91 92 /* Accept modes which occur inbetween the above rejection groups */ 93 R_NN_ACCEPT, //NN acceptance 94 R_HYPHEN_ACCEPT, //Hyphen acceptance 95 R_MM_ACCEPT, //Matrix match acceptance 96 R_QUALITY_ACCEPT, //Accept word in good quality doc 97 R_MINIMAL_REJ_ACCEPT //Accept EVERYTHING except tess failures 98 }; 99 100 /* REJECT MAP VALUES */ 101 102 #define MAP_ACCEPT '1' 103 #define MAP_REJECT_PERM '0' 104 #define MAP_REJECT_TEMP '2' 105 #define MAP_REJECT_POTENTIAL '3' 106 107 class REJ 108 { 109 BITS16 flags1; 110 BITS16 flags2; 111 set_flag(REJ_FLAGS rej_flag)112 void set_flag(REJ_FLAGS rej_flag) { 113 if (rej_flag < 16) 114 flags1.turn_on_bit (rej_flag); 115 else 116 flags2.turn_on_bit (rej_flag - 16); 117 } 118 119 BOOL8 rej_before_nn_accept(); 120 BOOL8 rej_between_nn_and_mm(); 121 BOOL8 rej_between_mm_and_quality_accept(); 122 BOOL8 rej_between_quality_and_minimal_rej_accept(); 123 BOOL8 rej_before_mm_accept(); 124 BOOL8 rej_before_quality_accept(); 125 126 public: REJ()127 REJ() { //constructor 128 } 129 REJ(const REJ & source)130 REJ( //classwise copy 131 const REJ &source) { 132 flags1 = source.flags1; 133 flags2 = source.flags2; 134 } 135 136 REJ & operator= ( //assign REJ 137 const REJ & source) { //from this 138 flags1 = source.flags1; 139 flags2 = source.flags2; 140 return *this; 141 } 142 flag(REJ_FLAGS rej_flag)143 BOOL8 flag(REJ_FLAGS rej_flag) { 144 if (rej_flag < 16) 145 return flags1.bit (rej_flag); 146 else 147 return flags2.bit (rej_flag - 16); 148 } 149 display_char()150 char display_char() { 151 if (perm_rejected ()) 152 return MAP_REJECT_PERM; 153 else if (accept_if_good_quality ()) 154 return MAP_REJECT_POTENTIAL; 155 else if (rejected ()) 156 return MAP_REJECT_TEMP; 157 else 158 return MAP_ACCEPT; 159 } 160 161 BOOL8 perm_rejected(); //Is char perm reject? 162 163 BOOL8 rejected(); //Is char rejected? 164 accepted()165 BOOL8 accepted() { //Is char accepted? 166 return !rejected (); 167 } 168 169 //potential rej? 170 BOOL8 accept_if_good_quality(); 171 recoverable()172 BOOL8 recoverable() { 173 return (rejected () && !perm_rejected ()); 174 } 175 176 void setrej_tess_failure(); //Tess generated blank 177 void setrej_small_xht(); //Small xht char/wd 178 void setrej_edge_char(); //Close to image edge 179 void setrej_1Il_conflict(); //Initial reject map 180 void setrej_postNN_1Il(); //1Il after NN 181 void setrej_rej_cblob(); //Insert duff blob 182 void setrej_mm_reject(); //Matrix matcher 183 //Odd repeated char 184 void setrej_bad_repetition(); 185 void setrej_poor_match(); //Failed Rays heuristic 186 //TEMP reject_word 187 void setrej_not_tess_accepted(); 188 //TEMP reject_word 189 void setrej_contains_blanks(); 190 void setrej_bad_permuter(); //POTENTIAL reject_word 191 void setrej_hyphen(); //PostNN dubious hyph or . 192 void setrej_dubious(); //PostNN dubious limit 193 void setrej_no_alphanums(); //TEMP reject_word 194 void setrej_mostly_rej(); //TEMP reject_word 195 void setrej_xht_fixup(); //xht fixup 196 void setrej_bad_quality(); //TEMP reject_word 197 void setrej_doc_rej(); //TEMP reject_word 198 void setrej_block_rej(); //TEMP reject_word 199 void setrej_row_rej(); //TEMP reject_word 200 void setrej_unlv_rej(); //TEMP reject_word 201 void setrej_nn_accept(); //NN Flipped a char 202 void setrej_hyphen_accept(); //Good aspect ratio 203 void setrej_mm_accept(); //Matrix matcher 204 //Quality flip a char 205 void setrej_quality_accept(); 206 //Accept all except blank 207 void setrej_minimal_rej_accept(); 208 209 void full_print(FILE *fp); 210 }; 211 212 class REJMAP 213 { 214 REJ *ptr; //ptr to the chars 215 inT16 len; //Number of chars 216 217 public: REJMAP()218 REJMAP() { //constructor 219 ptr = NULL; 220 len = 0; 221 } 222 223 REJMAP( //classwise copy 224 const REJMAP &rejmap); 225 226 REJMAP & operator= ( //assign REJMAP 227 const REJMAP & source); //from this 228 ~REJMAP()229 ~REJMAP () { //destructor 230 if (ptr != NULL) 231 free_struct (ptr, len * sizeof (REJ), "REJ"); 232 } 233 234 void initialise( //Redefine map 235 inT16 length); 236 237 REJ & operator[]( //access function 238 inT16 index) const //map index 239 { 240 ASSERT_HOST (index < len); 241 return ptr[index]; //no bounds checks 242 } 243 length()244 inT32 length() const { //map length 245 return len; 246 } 247 248 inT16 accept_count(); //How many accepted? 249 reject_count()250 inT16 reject_count() { //How many rejects? 251 return len - accept_count (); 252 } 253 254 void remove_pos( //Cut out an element 255 inT16 pos); //element to remove 256 257 void print(FILE *fp); 258 259 void full_print(FILE *fp); 260 261 BOOL8 recoverable_rejects(); //Any non perm rejs? 262 263 BOOL8 quality_recoverable_rejects(); 264 //Any potential rejs? 265 266 void rej_word_small_xht(); //Reject whole word 267 //Reject whole word 268 void rej_word_tess_failure(); 269 void rej_word_not_tess_accepted(); 270 //Reject whole word 271 //Reject whole word 272 void rej_word_contains_blanks(); 273 //Reject whole word 274 void rej_word_bad_permuter(); 275 void rej_word_xht_fixup(); //Reject whole word 276 //Reject whole word 277 void rej_word_no_alphanums(); 278 void rej_word_mostly_rej(); //Reject whole word 279 void rej_word_bad_quality(); //Reject whole word 280 void rej_word_doc_rej(); //Reject whole word 281 void rej_word_block_rej(); //Reject whole word 282 void rej_word_row_rej(); //Reject whole word 283 }; 284 #endif 285