• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        rejctmap.h  (Formerly rejmap.h)
3  * Description: REJ and REJMAP class functions.
4  * Author:		Phil Cheatle
5  * Created:		Thu Jun  9 13:46:38 BST 1994
6  *
7  * (C) Copyright 1994, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18 
19 This module may look unneccessarily verbose, but here's the philosophy...
20 
21 ALL processing of the reject map is done in this module. There are lots of
22 separate calls to set reject/accept flags. These have DELIBERATELY been kept
23 distinct so that this module can decide what to do.
24 
25 Basically, there is a flag for each sort of rejection or acceptance. This
26 provides a history of what has happened to EACH character.
27 
28 Determining whether a character is CURRENTLY rejected depends on implicit
29 understanding of the SEQUENCE of possible calls. The flags are defined and
30 grouped in the REJ_FLAGS enum. These groupings are used in determining a
31 characters CURRENT rejection status. Basically, a character is ACCEPTED if
32 
33     none of the permanent rej flags are set
34   AND (    the character has never been rejected
35       OR an accept flag is set which is LATER than the latest reject flag )
36 
37 IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
38 OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
39 **********************************************************************/
40 
41 #ifndef           REJCTMAP_H
42 #define           REJCTMAP_H
43 
44 #ifdef __UNIX__
45 #include          <assert.h>
46 #endif
47 #include          "memry.h"
48 #include          "bits16.h"
49 #include                   "varable.h"
50 #include          "notdll.h"
51 
52 extern BOOL_VAR_H (rejword_only_set_if_accepted, TRUE,
53 "Mimic old reject_word");
54 extern BOOL_VAR_H (rejmap_allow_more_good_qual, FALSE,
55 "Use initial good qual setting");
56 extern BOOL_VAR_H (rej_use_1Il_rej, TRUE, "1Il rejection enabled");
57 
58 enum REJ_FLAGS
59 {
60   /* Reject modes which are NEVER overridden */
61   R_TESS_FAILURE,                // PERM Tess didnt classify
62   R_SMALL_XHT,                   // PERM Xht too small
63   R_EDGE_CHAR,                   // PERM Too close to edge of image
64   R_1IL_CONFLICT,                // PERM 1Il confusion
65   R_POSTNN_1IL,                  // PERM 1Il unrejected by NN
66   R_REJ_CBLOB,                   // PERM Odd blob
67   R_MM_REJECT,                   // PERM Matrix match rejection (m's)
68   R_BAD_REPETITION,              // TEMP Repeated char which doesn't match trend
69 
70   /* Initial reject modes (pre NN_ACCEPT) */
71   R_POOR_MATCH,                  // TEMP Ray's original heuristic (Not used)
72   R_NOT_TESS_ACCEPTED,           // TEMP Tess didnt accept WERD
73   R_CONTAINS_BLANKS,             // TEMP Tess failed on other chs in WERD
74   R_BAD_PERMUTER,                // POTENTIAL Bad permuter for WERD
75 
76   /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
77   R_HYPHEN,                      // TEMP Post NN dodgy hyphen or full stop
78   R_DUBIOUS,                     // TEMP Post NN dodgy chars
79   R_NO_ALPHANUMS,                // TEMP No alphanumerics in word after NN
80   R_MOSTLY_REJ,                  // TEMP Most of word rejected so rej the rest
81   R_XHT_FIXUP,                   // TEMP Xht tests unsure
82 
83   /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
84   R_BAD_QUALITY,                 // TEMP Quality metrics bad for WERD
85 
86   /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
87   R_DOC_REJ,                     // TEMP Document rejection
88   R_BLOCK_REJ,                   // TEMP Block rejection
89   R_ROW_REJ,                     // TEMP Row rejection
90   R_UNLV_REJ,                    // TEMP ~ turned to - or ^ turned to space
91 
92   /* Accept modes which occur inbetween the above rejection groups */
93   R_NN_ACCEPT,                   //NN acceptance
94   R_HYPHEN_ACCEPT,               //Hyphen acceptance
95   R_MM_ACCEPT,                   //Matrix match acceptance
96   R_QUALITY_ACCEPT,              //Accept word in good quality doc
97   R_MINIMAL_REJ_ACCEPT           //Accept EVERYTHING except tess failures
98 };
99 
100 /* REJECT MAP VALUES */
101 
102 #define           MAP_ACCEPT '1'
103 #define           MAP_REJECT_PERM '0'
104 #define           MAP_REJECT_TEMP '2'
105 #define           MAP_REJECT_POTENTIAL '3'
106 
107 class REJ
108 {
109   BITS16 flags1;
110   BITS16 flags2;
111 
set_flag(REJ_FLAGS rej_flag)112   void set_flag(REJ_FLAGS rej_flag) {
113     if (rej_flag < 16)
114       flags1.turn_on_bit (rej_flag);
115     else
116       flags2.turn_on_bit (rej_flag - 16);
117   }
118 
119   BOOL8 rej_before_nn_accept();
120   BOOL8 rej_between_nn_and_mm();
121   BOOL8 rej_between_mm_and_quality_accept();
122   BOOL8 rej_between_quality_and_minimal_rej_accept();
123   BOOL8 rej_before_mm_accept();
124   BOOL8 rej_before_quality_accept();
125 
126   public:
REJ()127     REJ() {  //constructor
128     }
129 
REJ(const REJ & source)130     REJ(  //classwise copy
131         const REJ &source) {
132       flags1 = source.flags1;
133       flags2 = source.flags2;
134     }
135 
136     REJ & operator= (            //assign REJ
137     const REJ & source) {        //from this
138       flags1 = source.flags1;
139       flags2 = source.flags2;
140       return *this;
141     }
142 
flag(REJ_FLAGS rej_flag)143     BOOL8 flag(REJ_FLAGS rej_flag) {
144       if (rej_flag < 16)
145         return flags1.bit (rej_flag);
146       else
147         return flags2.bit (rej_flag - 16);
148     }
149 
display_char()150     char display_char() {
151       if (perm_rejected ())
152         return MAP_REJECT_PERM;
153       else if (accept_if_good_quality ())
154         return MAP_REJECT_POTENTIAL;
155       else if (rejected ())
156         return MAP_REJECT_TEMP;
157       else
158         return MAP_ACCEPT;
159     }
160 
161     BOOL8 perm_rejected();  //Is char perm reject?
162 
163     BOOL8 rejected();  //Is char rejected?
164 
accepted()165     BOOL8 accepted() {  //Is char accepted?
166       return !rejected ();
167     }
168 
169                                  //potential rej?
170     BOOL8 accept_if_good_quality();
171 
recoverable()172     BOOL8 recoverable() {
173       return (rejected () && !perm_rejected ());
174     }
175 
176     void setrej_tess_failure();  //Tess generated blank
177     void setrej_small_xht();  //Small xht char/wd
178     void setrej_edge_char();  //Close to image edge
179     void setrej_1Il_conflict();  //Initial reject map
180     void setrej_postNN_1Il();  //1Il after NN
181     void setrej_rej_cblob();  //Insert duff blob
182     void setrej_mm_reject();  //Matrix matcher
183                                  //Odd repeated char
184     void setrej_bad_repetition();
185     void setrej_poor_match();  //Failed Rays heuristic
186                                  //TEMP reject_word
187     void setrej_not_tess_accepted();
188                                  //TEMP reject_word
189     void setrej_contains_blanks();
190     void setrej_bad_permuter();  //POTENTIAL reject_word
191     void setrej_hyphen();  //PostNN dubious hyph or .
192     void setrej_dubious();  //PostNN dubious limit
193     void setrej_no_alphanums();  //TEMP reject_word
194     void setrej_mostly_rej();  //TEMP reject_word
195     void setrej_xht_fixup();  //xht fixup
196     void setrej_bad_quality();  //TEMP reject_word
197     void setrej_doc_rej();  //TEMP reject_word
198     void setrej_block_rej();  //TEMP reject_word
199     void setrej_row_rej();  //TEMP reject_word
200     void setrej_unlv_rej();  //TEMP reject_word
201     void setrej_nn_accept();  //NN Flipped a char
202     void setrej_hyphen_accept();  //Good aspect ratio
203     void setrej_mm_accept();  //Matrix matcher
204                                  //Quality flip a char
205     void setrej_quality_accept();
206                                  //Accept all except blank
207     void setrej_minimal_rej_accept();
208 
209     void full_print(FILE *fp);
210 };
211 
212 class REJMAP
213 {
214   REJ *ptr;                      //ptr to the chars
215   inT16 len;                     //Number of chars
216 
217   public:
REJMAP()218     REJMAP() {  //constructor
219       ptr = NULL;
220       len = 0;
221     }
222 
223     REJMAP(  //classwise copy
224            const REJMAP &rejmap);
225 
226     REJMAP & operator= (         //assign REJMAP
227       const REJMAP & source);    //from this
228 
~REJMAP()229     ~REJMAP () {                 //destructor
230       if (ptr != NULL)
231         free_struct (ptr, len * sizeof (REJ), "REJ");
232     }
233 
234     void initialise(  //Redefine map
235                     inT16 length);
236 
237     REJ & operator[](            //access function
238       inT16 index) const         //map index
239     {
240       ASSERT_HOST (index < len);
241       return ptr[index];         //no bounds checks
242     }
243 
length()244     inT32 length() const {  //map length
245       return len;
246     }
247 
248     inT16 accept_count();  //How many accepted?
249 
reject_count()250     inT16 reject_count() {  //How many rejects?
251       return len - accept_count ();
252     }
253 
254     void remove_pos(             //Cut out an element
255                     inT16 pos);  //element to remove
256 
257     void print(FILE *fp);
258 
259     void full_print(FILE *fp);
260 
261     BOOL8 recoverable_rejects();  //Any non perm rejs?
262 
263     BOOL8 quality_recoverable_rejects();
264     //Any potential rejs?
265 
266     void rej_word_small_xht();  //Reject whole word
267                                  //Reject whole word
268     void rej_word_tess_failure();
269     void rej_word_not_tess_accepted();
270     //Reject whole word
271                                  //Reject whole word
272     void rej_word_contains_blanks();
273                                  //Reject whole word
274     void rej_word_bad_permuter();
275     void rej_word_xht_fixup();  //Reject whole word
276                                  //Reject whole word
277     void rej_word_no_alphanums();
278     void rej_word_mostly_rej();  //Reject whole word
279     void rej_word_bad_quality();  //Reject whole word
280     void rej_word_doc_rej();  //Reject whole word
281     void rej_word_block_rej();  //Reject whole word
282     void rej_word_row_rej();  //Reject whole word
283 };
284 #endif
285